├── tests
├── test_providers.py
├── benchmarks
│ ├── __init__.py
│ └── README.md
├── test_utils.py
├── test_hf_api.py
├── test_exceptions.py
├── test_together.py
└── __init__.py
├── cascadeflow
├── integrations
│ └── langchain
│ │ ├── tests
│ │ └── __init__.py
│ │ ├── routers
│ │ └── __init__.py
│ │ ├── __init__.py
│ │ └── types.py
├── limits
│ └── __init__.py
├── tools
│ ├── __init__.py
│ ├── examples.py
│ ├── formats.py
│ ├── result.py
│ └── call.py
├── core
│ ├── __init__.py
│ └── batch_config.py
├── ml
│ └── __init__.py
├── resilience
│ └── __init__.py
├── dynamic_config
│ └── __init__.py
├── utils
│ ├── __init__.py
│ └── caching.py
├── guardrails
│ ├── __init__.py
│ └── manager.py
├── profiles
│ ├── __init__.py
│ ├── profile_manager.py
│ └── tier_config.py
├── interface
│ └── __init__.py
├── streaming
│ └── __init__.py
├── providers
│ ├── deepseek.py
│ └── __init__.py
├── schema
│ └── __init__.py
└── scripts
│ ├── format_code.bat
│ └── format_code.sh
├── packages
├── ml
│ ├── .npmignore
│ ├── src
│ │ ├── index.ts
│ │ └── types.ts
│ ├── tsconfig.json
│ └── package.json
├── core
│ ├── .gitignore
│ ├── examples
│ │ ├── browser
│ │ │ ├── vercel-edge
│ │ │ │ ├── vercel.json
│ │ │ │ ├── package.json
│ │ │ │ └── api
│ │ │ │ │ └── chat.ts
│ │ │ └── README.md
│ │ ├── nodejs
│ │ │ ├── tsconfig.json
│ │ │ ├── package.json
│ │ │ └── test-complexity-quick.ts
│ │ ├── package.json
│ │ ├── run-example.sh
│ │ └── scripts
│ │ │ └── test-typescript.sh
│ ├── src
│ │ ├── config
│ │ │ └── index.ts
│ │ ├── tools
│ │ │ └── index.ts
│ │ └── streaming
│ │ │ └── index.ts
│ ├── tsconfig.json
│ ├── .eslintrc.js
│ ├── typedoc.json
│ ├── quick-perf-test.sh
│ ├── test-examples.sh
│ ├── package.json
│ └── tests
│ │ └── basic-test.ts
├── integrations
│ └── n8n
│ │ ├── gulpfile.js
│ │ ├── .eslintrc.js
│ │ ├── tsconfig.json
│ │ ├── nodes
│ │ └── LmChatCascadeFlow
│ │ │ └── cascadeflow.svg
│ │ ├── package.json
│ │ ├── credentials
│ │ └── CascadeFlowApi.credentials.ts
│ │ ├── DEPRECATE_5.0.x.sh
│ │ └── TROUBLESHOOTING.md
└── langchain-cascadeflow
│ ├── vitest.config.ts
│ ├── tsconfig.json
│ ├── examples
│ ├── inspect-metadata.ts
│ ├── streaming-cascade.ts
│ └── analyze-models.ts
│ ├── package.json
│ └── src
│ ├── index.ts
│ └── types.ts
├── .github
├── assets
│ ├── n8n-CF.png
│ ├── LC-logo-bright.png
│ ├── LC-logo-dark.png
│ ├── n8n-CF-domains.jpg
│ ├── CF_icon_dark.svg
│ ├── CF_icon_bright.svg
│ ├── CF_n8n_color.svg
│ ├── CF_ts_color.svg
│ ├── README.md
│ ├── CF_python_color.svg
│ ├── Lemony_logo_dark.svg
│ ├── Lemony_logo_bright.svg
│ └── CF_logo_dark.svg
├── ISSUE_TEMPLATE
│ ├── milestone.md
│ ├── config.yml
│ └── question.yml
├── workflows
│ ├── labeler.yml
│ └── release.yml
├── dependabot.yml
├── labeler.yml
└── CODEOWNERS
├── pnpm-workspace.yaml
├── examples
├── integrations
│ ├── prometheus.yml
│ ├── grafana-datasource.yml
│ ├── otel-collector-config.yaml
│ └── docker-compose.yml
├── batch_processing.py
├── docker
│ └── multi-instance-ollama
│ │ └── docker-compose.yml
├── vllm_example.py
├── guardrails_usage.py
└── langchain_basic_usage.py
├── turbo.json
├── package.json
├── LICENSE
├── requirements.txt
├── scripts
└── test-typescript-examples.sh
├── docs
└── README.md
└── requirements-dev.txt
/tests/test_providers.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cascadeflow/integrations/langchain/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests for LangChain integration."""
2 |
--------------------------------------------------------------------------------
/packages/ml/.npmignore:
--------------------------------------------------------------------------------
1 | src/
2 | tsconfig.json
3 | *.test.ts
4 | .DS_Store
5 | node_modules/
6 |
--------------------------------------------------------------------------------
/packages/core/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | dist/
3 | *.log
4 | .DS_Store
5 | coverage/
6 | .turbo/
7 |
--------------------------------------------------------------------------------
/.github/assets/n8n-CF.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/n8n-CF.png
--------------------------------------------------------------------------------
/.github/assets/LC-logo-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/LC-logo-bright.png
--------------------------------------------------------------------------------
/.github/assets/LC-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/LC-logo-dark.png
--------------------------------------------------------------------------------
/.github/assets/n8n-CF-domains.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lemony-ai/cascadeflow/HEAD/.github/assets/n8n-CF-domains.jpg
--------------------------------------------------------------------------------
/pnpm-workspace.yaml:
--------------------------------------------------------------------------------
1 | packages:
2 | - 'packages/*'
3 | - 'packages/integrations/*'
4 | - 'packages/core/examples'
5 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/gulpfile.js:
--------------------------------------------------------------------------------
1 | const { src, dest } = require('gulp');
2 |
3 | function buildIcons() {
4 | return src('nodes/**/*.svg').pipe(dest('dist/nodes'));
5 | }
6 |
7 | exports['build:icons'] = buildIcons;
8 |
--------------------------------------------------------------------------------
/packages/core/examples/browser/vercel-edge/vercel.json:
--------------------------------------------------------------------------------
1 | {
2 | "functions": {
3 | "api/**/*.ts": {
4 | "runtime": "@vercel/node@3.0.0"
5 | }
6 | },
7 | "env": {
8 | "OPENAI_API_KEY": "@openai-api-key"
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/examples/integrations/prometheus.yml:
--------------------------------------------------------------------------------
1 | # Prometheus Configuration for cascadeflow
2 | #
3 | # Scrapes metrics from OpenTelemetry Collector
4 |
5 | global:
6 | scrape_interval: 15s
7 | evaluation_interval: 15s
8 |
9 | scrape_configs:
10 | - job_name: 'otel-collector'
11 | static_configs:
12 | - targets: ['otel-collector:8889']
13 |
--------------------------------------------------------------------------------
/examples/integrations/grafana-datasource.yml:
--------------------------------------------------------------------------------
1 | # Grafana Datasource Configuration
2 | #
3 | # Automatically configures Prometheus as a datasource
4 |
5 | apiVersion: 1
6 |
7 | datasources:
8 | - name: Prometheus
9 | type: prometheus
10 | access: proxy
11 | url: http://prometheus:9090
12 | isDefault: true
13 | editable: true
14 |
--------------------------------------------------------------------------------
/packages/ml/src/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * @cascadeflow/ml - ML semantic detection for cascadeflow TypeScript
3 | *
4 | * Brings TypeScript to feature parity with Python's ML capabilities using Transformers.js.
5 | */
6 |
7 | export { UnifiedEmbeddingService, EmbeddingCache } from './embedding';
8 | export type { EmbeddingVector, CacheInfo } from './types';
9 |
--------------------------------------------------------------------------------
/packages/core/examples/nodejs/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "module": "ESNext",
5 | "moduleResolution": "bundler",
6 | "lib": ["ES2022"],
7 | "strict": true,
8 | "esModuleInterop": true,
9 | "skipLibCheck": true,
10 | "forceConsistentCasingInFileNames": true,
11 | "resolveJsonModule": true,
12 | "types": ["node"]
13 | },
14 | "include": ["*.ts"],
15 | "exclude": ["node_modules"]
16 | }
17 |
--------------------------------------------------------------------------------
/packages/core/examples/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cascadeflow-examples",
3 | "version": "0.1.0",
4 | "private": true,
5 | "type": "module",
6 | "description": "TypeScript examples for cascadeflow",
7 | "dependencies": {
8 | "@cascadeflow/core": "workspace:*",
9 | "openai": "^4.73.1",
10 | "@anthropic-ai/sdk": "^0.30.0",
11 | "groq-sdk": "^0.5.0"
12 | },
13 | "devDependencies": {
14 | "tsx": "^4.7.0",
15 | "typescript": "^5.3.3"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/vitest.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vitest/config';
2 |
3 | export default defineConfig({
4 | test: {
5 | globals: true,
6 | environment: 'node',
7 | coverage: {
8 | provider: 'v8',
9 | reporter: ['text', 'json', 'html'],
10 | exclude: [
11 | 'node_modules/',
12 | 'dist/',
13 | 'examples/',
14 | '**/*.d.ts',
15 | '**/*.config.*',
16 | '**/types.ts',
17 | ],
18 | },
19 | },
20 | });
21 |
--------------------------------------------------------------------------------
/packages/core/examples/browser/vercel-edge/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cascadeflow-vercel-edge-example",
3 | "version": "1.0.0",
4 | "private": true,
5 | "description": "cascadeflow Vercel Edge Function example",
6 | "type": "module",
7 | "scripts": {
8 | "dev": "vercel dev",
9 | "deploy": "vercel deploy --prod"
10 | },
11 | "dependencies": {
12 | "@cascadeflow/core": "workspace:*",
13 | "openai": "^4.73.1"
14 | },
15 | "devDependencies": {
16 | "vercel": "^37.0.0"
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/turbo.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://turbo.build/schema.json",
3 | "pipeline": {
4 | "build": {
5 | "dependsOn": ["^build"],
6 | "outputs": ["dist/**", ".next/**", "!.next/cache/**"]
7 | },
8 | "test": {
9 | "dependsOn": ["build"],
10 | "outputs": ["coverage/**"]
11 | },
12 | "lint": {
13 | "outputs": []
14 | },
15 | "dev": {
16 | "cache": false,
17 | "persistent": true
18 | },
19 | "clean": {
20 | "cache": false
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/.github/assets/CF_icon_dark.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/packages/core/src/config/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Configuration module exports
3 | */
4 |
5 | export {
6 | type DomainConfig,
7 | type DomainConfigMap,
8 | type DomainValidationMethod,
9 | DEFAULT_DOMAIN_CONFIG,
10 | BUILTIN_DOMAIN_CONFIGS,
11 | createDomainConfig,
12 | validateDomainConfig,
13 | getBuiltinDomainConfig,
14 | validationMethodToDomain,
15 | domainValidationToMethod,
16 | } from './domain-config';
17 |
18 | export {
19 | type ModelRegistryEntry,
20 | ModelRegistry,
21 | defaultModelRegistry,
22 | getModel,
23 | hasModel,
24 | } from './model-registry';
25 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | parser: '@typescript-eslint/parser',
3 | parserOptions: {
4 | ecmaVersion: 2020,
5 | sourceType: 'module',
6 | },
7 | plugins: ['eslint-plugin-n8n-nodes-base'],
8 | extends: [
9 | 'plugin:n8n-nodes-base/nodes',
10 | 'plugin:n8n-nodes-base/credentials',
11 | 'plugin:n8n-nodes-base/community',
12 | ],
13 | rules: {
14 | // Disable conflicting rules for documentationUrl
15 | 'n8n-nodes-base/cred-class-field-documentation-url-miscased': 'off',
16 | 'n8n-nodes-base/cred-class-field-documentation-url-not-http-url': 'off',
17 | },
18 | };
19 |
--------------------------------------------------------------------------------
/packages/ml/src/types.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Type definitions for ML package
3 | */
4 |
5 | /**
6 | * Embedding vector with dimensions
7 | */
8 | export interface EmbeddingVector {
9 | /** Float32Array containing the embedding data */
10 | data: Float32Array;
11 | /** Number of dimensions (384 for BGE-small-en-v1.5) */
12 | dimensions: number;
13 | }
14 |
15 | /**
16 | * Cache information for debugging
17 | */
18 | export interface CacheInfo {
19 | /** Number of cached embeddings */
20 | size: number;
21 | /** List of cached text keys (limited to first 5 for debugging) */
22 | texts: string[];
23 | }
24 |
--------------------------------------------------------------------------------
/packages/ml/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "ESNext",
5 | "lib": ["ES2020"],
6 | "moduleResolution": "node",
7 | "resolveJsonModule": true,
8 | "declaration": true,
9 | "declarationMap": true,
10 | "sourceMap": true,
11 | "outDir": "./dist",
12 | "rootDir": "./src",
13 | "strict": true,
14 | "esModuleInterop": true,
15 | "skipLibCheck": true,
16 | "forceConsistentCasingInFileNames": true,
17 | "allowSyntheticDefaultImports": true
18 | },
19 | "include": ["src/**/*"],
20 | "exclude": ["node_modules", "dist", "**/*.test.ts"]
21 | }
22 |
--------------------------------------------------------------------------------
/packages/core/examples/run-example.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Helper script to run TypeScript examples with proper module resolution
3 | #
4 | # Usage: ./run-example.sh nodejs/basic-usage.ts
5 |
6 | set -e
7 |
8 | # Ensure we're in the core package directory
9 | cd "$(dirname "$0")/.."
10 |
11 | # Build if needed
12 | if [ ! -d "dist" ]; then
13 | echo "📦 Building @cascadeflow/core..."
14 | pnpm build
15 | fi
16 |
17 | # Load environment variables
18 | if [ -f "../../../.env" ]; then
19 | set -a
20 | source ../../../.env
21 | set +a
22 | fi
23 |
24 | # Run with tsx and use node_modules resolution
25 | npx tsx --conditions=import "examples/$1"
26 |
--------------------------------------------------------------------------------
/packages/core/examples/scripts/test-typescript.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Test TypeScript examples
3 |
4 | cd packages/core/examples || exit 1
5 |
6 | # Load env vars
7 | set -a
8 | source ../../../.env 2>/dev/null || true
9 | set +a
10 |
11 | PASSED=0
12 | FAILED=0
13 |
14 | for example in nodejs/*.ts streaming.ts; do
15 | [ -f "$example" ] || continue
16 | echo "Testing: $example"
17 | if npx tsx "$example" > /dev/null 2>&1; then
18 | echo "✅ PASSED"
19 | ((PASSED++))
20 | else
21 | echo "❌ FAILED"
22 | ((FAILED++))
23 | fi
24 | done
25 |
26 | echo ""
27 | echo "Passed: $PASSED, Failed: $FAILED"
28 | exit $FAILED
29 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "commonjs",
5 | "lib": ["ES2020"],
6 | "outDir": "dist",
7 | "rootDir": ".",
8 | "declaration": true,
9 | "declarationMap": true,
10 | "sourceMap": true,
11 | "strict": true,
12 | "esModuleInterop": true,
13 | "skipLibCheck": true,
14 | "forceConsistentCasingInFileNames": true,
15 | "resolveJsonModule": true,
16 | "moduleResolution": "node",
17 | "types": ["node"]
18 | },
19 | "include": [
20 | "credentials/**/*",
21 | "nodes/**/*"
22 | ],
23 | "exclude": [
24 | "node_modules",
25 | "dist"
26 | ]
27 | }
28 |
--------------------------------------------------------------------------------
/examples/integrations/otel-collector-config.yaml:
--------------------------------------------------------------------------------
1 | # OpenTelemetry Collector Configuration for cascadeflow
2 | #
3 | # This configuration:
4 | # 1. Receives metrics via OTLP HTTP (port 4318)
5 | # 2. Exports metrics to Prometheus (port 8889)
6 |
7 | receivers:
8 | otlp:
9 | protocols:
10 | http:
11 | endpoint: 0.0.0.0:4318
12 | grpc:
13 | endpoint: 0.0.0.0:4317
14 |
15 | processors:
16 | batch:
17 | timeout: 10s
18 | send_batch_size: 1024
19 |
20 | exporters:
21 | prometheus:
22 | endpoint: "0.0.0.0:8889"
23 | namespace: cascadeflow
24 |
25 | logging:
26 | loglevel: info
27 |
28 | service:
29 | pipelines:
30 | metrics:
31 | receivers: [otlp]
32 | processors: [batch]
33 | exporters: [prometheus, logging]
34 |
--------------------------------------------------------------------------------
/.github/assets/CF_icon_bright.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/milestone.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Milestone
3 | about: Track implementation milestones
4 | title: '[MILESTONE] '
5 | labels: milestone
6 | assignees: ''
7 | ---
8 |
9 | ## Milestone Overview
10 |
11 |
12 | ## Tasks
13 |
14 | - [ ] Task 1
15 | - [ ] Task 2
16 | - [ ] Task 3
17 |
18 | ## Acceptance Criteria
19 |
20 | - [ ] Criterion 1
21 | - [ ] Criterion 2
22 |
23 | ## Tests Required
24 |
25 | - [ ] Unit tests: X+
26 | - [ ] Integration tests: Y+
27 |
28 | ## Documentation
29 |
30 | - [ ] API documentation
31 | - [ ] Usage examples
32 | - [ ] README updates
33 |
34 | ## Estimated Duration
35 |
36 | X-Y days
37 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/nodes/LmChatCascadeFlow/cascadeflow.svg:
--------------------------------------------------------------------------------
1 |
2 |
16 |
--------------------------------------------------------------------------------
/cascadeflow/integrations/langchain/routers/__init__.py:
--------------------------------------------------------------------------------
1 | """Router framework for CascadeFlow LangChain integration.
2 |
3 | This module contains the PreRouter and base router classes for
4 | intelligent query routing based on complexity detection.
5 | """
6 |
7 | from .base import (
8 | Router,
9 | RouterChain,
10 | RoutingDecision,
11 | RoutingDecisionHelper,
12 | RoutingStrategy,
13 | )
14 | from .pre_router import (
15 | PreRouter,
16 | PreRouterConfig,
17 | PreRouterStats,
18 | create_pre_router,
19 | )
20 |
21 | __all__ = [
22 | # Base router framework
23 | "Router",
24 | "RouterChain",
25 | "RoutingDecision",
26 | "RoutingDecisionHelper",
27 | "RoutingStrategy",
28 | # PreRouter
29 | "PreRouter",
30 | "PreRouterConfig",
31 | "PreRouterStats",
32 | "create_pre_router",
33 | ]
34 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cascadeflow-monorepo",
3 | "version": "0.1.0",
4 | "private": true,
5 | "description": "cascadeflow monorepo - Python and TypeScript libraries",
6 | "author": {
7 | "name": "Lemony Inc.",
8 | "email": "hello@lemony.ai"
9 | },
10 | "license": "MIT",
11 | "repository": {
12 | "type": "git",
13 | "url": "https://github.com/lemony-ai/cascadeflow.git"
14 | },
15 | "scripts": {
16 | "build": "turbo run build",
17 | "dev": "turbo run dev",
18 | "test": "turbo run test",
19 | "lint": "turbo run lint",
20 | "clean": "turbo run clean"
21 | },
22 | "devDependencies": {
23 | "turbo": "^1.11.0"
24 | },
25 | "engines": {
26 | "node": ">=18.0.0",
27 | "pnpm": ">=8.0.0"
28 | },
29 | "packageManager": "pnpm@8.15.0",
30 | "dependencies": {
31 | "tsx": "^4.7.0"
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/packages/core/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "ESNext",
5 | "lib": ["ES2020"],
6 | "moduleResolution": "bundler",
7 | "resolveJsonModule": true,
8 | "allowJs": false,
9 | "checkJs": false,
10 | "outDir": "./dist",
11 | "rootDir": "./src",
12 | "removeComments": true,
13 | "declaration": true,
14 | "declarationMap": true,
15 | "sourceMap": true,
16 | "strict": true,
17 | "noUnusedLocals": false,
18 | "noUnusedParameters": false,
19 | "noImplicitReturns": true,
20 | "noFallthroughCasesInSwitch": true,
21 | "esModuleInterop": true,
22 | "skipLibCheck": true,
23 | "forceConsistentCasingInFileNames": true,
24 | "allowSyntheticDefaultImports": true
25 | },
26 | "include": ["src/**/*"],
27 | "exclude": ["node_modules", "dist", "tests"]
28 | }
29 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: 📚 Documentation
4 | url: https://github.com/lemony-ai/cascadeflow/blob/main/README.md
5 | about: Check out our comprehensive documentation and guides
6 | - name: 💬 GitHub Discussions
7 | url: https://github.com/lemony-ai/cascadeflow/discussions
8 | about: Join the community discussion for questions and ideas
9 | - name: 📖 Examples
10 | url: https://github.com/lemony-ai/cascadeflow/tree/main/examples
11 | about: Browse working examples for all supported providers
12 | - name: 🐛 Known Issues
13 | url: https://github.com/lemony-ai/cascadeflow/issues?q=is%3Aissue+label%3Abug
14 | about: Check if your issue has already been reported
15 | - name: 💼 Support
16 | url: mailto:hello@lemony.ai
17 | about: Contact us for support, consulting, or enterprise inquiries
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "module": "ESNext",
5 | "lib": ["ES2020"],
6 | "moduleResolution": "bundler",
7 | "resolveJsonModule": true,
8 | "allowJs": false,
9 | "checkJs": false,
10 | "outDir": "./dist",
11 | "rootDir": "./src",
12 | "removeComments": true,
13 | "declaration": true,
14 | "declarationMap": true,
15 | "sourceMap": true,
16 | "strict": true,
17 | "noUnusedLocals": false,
18 | "noUnusedParameters": false,
19 | "noImplicitReturns": true,
20 | "noFallthroughCasesInSwitch": true,
21 | "esModuleInterop": true,
22 | "skipLibCheck": true,
23 | "forceConsistentCasingInFileNames": true,
24 | "allowSyntheticDefaultImports": true
25 | },
26 | "include": ["src/**/*"],
27 | "exclude": ["node_modules", "dist", "tests"]
28 | }
29 |
--------------------------------------------------------------------------------
/packages/core/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | parser: '@typescript-eslint/parser',
3 | parserOptions: {
4 | ecmaVersion: 2020,
5 | sourceType: 'module',
6 | project: './tsconfig.json',
7 | },
8 | plugins: ['@typescript-eslint'],
9 | extends: [
10 | 'eslint:recommended',
11 | 'plugin:@typescript-eslint/recommended',
12 | ],
13 | rules: {
14 | // Type safety (warnings to allow gradual improvement)
15 | '@typescript-eslint/no-explicit-any': 'warn',
16 | '@typescript-eslint/no-unused-vars': ['warn', { argsIgnorePattern: '^_' }],
17 | '@typescript-eslint/no-var-requires': 'warn', // Allow require() for now
18 |
19 | // Code quality (warnings instead of errors)
20 | 'prefer-const': 'warn',
21 | 'no-useless-escape': 'warn',
22 | },
23 | ignorePatterns: [
24 | 'dist',
25 | 'node_modules',
26 | '*.js',
27 | 'examples',
28 | '__tests__',
29 | ],
30 | };
31 |
--------------------------------------------------------------------------------
/packages/core/examples/nodejs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "cascadeflow-nodejs-examples",
3 | "version": "0.1.0",
4 | "private": true,
5 | "description": "Node.js examples for @cascadeflow/core",
6 | "type": "module",
7 | "scripts": {
8 | "basic": "tsx basic-usage.ts",
9 | "tools": "tsx tool-calling.ts",
10 | "multi": "tsx multi-provider.ts"
11 | },
12 | "dependencies": {
13 | "@cascadeflow/core": "workspace:*"
14 | },
15 | "devDependencies": {
16 | "tsx": "^4.7.0",
17 | "typescript": "^5.3.3",
18 | "dotenv": "^16.3.1",
19 | "@types/node": "^20.10.0"
20 | },
21 | "peerDependencies": {
22 | "openai": "^4.0.0",
23 | "@anthropic-ai/sdk": "^0.30.0",
24 | "groq-sdk": "^0.5.0"
25 | },
26 | "peerDependenciesMeta": {
27 | "openai": {
28 | "optional": true
29 | },
30 | "@anthropic-ai/sdk": {
31 | "optional": true
32 | },
33 | "groq-sdk": {
34 | "optional": true
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/cascadeflow/limits/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Rate Limiting for cascadeflow.
3 |
4 | Provides per-user and per-tier rate limiting with sliding window algorithm
5 | for controlling API usage and enforcing subscription tier limits.
6 |
7 | Key components:
8 | - RateLimiter: Sliding window rate limiter
9 | - RateLimitError: Exception for rate limit violations
10 |
11 | Example usage:
12 | from cascadeflow.limits import RateLimiter
13 | from cascadeflow import UserProfile, TierLevel
14 |
15 | profile = UserProfile.from_tier(TierLevel.PRO, user_id="user_123")
16 | limiter = RateLimiter()
17 |
18 | # Check if request is allowed
19 | if await limiter.check_rate_limit(profile):
20 | # Process request
21 | result = await agent.run(query)
22 | else:
23 | # Rate limit exceeded
24 | raise RateLimitError("Rate limit exceeded")
25 | """
26 |
27 | from .rate_limiter import RateLimiter, RateLimitState, RateLimitError
28 |
29 | __all__ = [
30 | "RateLimiter",
31 | "RateLimitState",
32 | "RateLimitError",
33 | ]
34 |
--------------------------------------------------------------------------------
/cascadeflow/tools/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | cascadeflow tool calling system.
3 |
4 | This is an OPTIONAL feature - existing cascadeflow code works unchanged.
5 |
6 | Quick Start:
7 | from cascadeflow.tools import tool, ToolExecutor
8 |
9 | @tool
10 | def get_weather(city: str) -> dict:
11 | '''Get weather for a city.'''
12 | return {"temp": 22, "condition": "sunny"}
13 |
14 | executor = ToolExecutor([get_weather])
15 | result = await executor.execute(tool_call)
16 | """
17 |
18 | from .call import ToolCall, ToolCallFormat
19 | from .config import ToolConfig, create_tool_from_function, tool
20 | from .examples import example_calculator, example_get_weather
21 | from .executor import ToolExecutor
22 | from .result import ToolResult
23 |
24 | __version__ = "0.1.0"
25 |
26 | __all__ = [
27 | # Core classes
28 | "ToolConfig",
29 | "ToolCall",
30 | "ToolResult",
31 | "ToolExecutor",
32 | # Enums
33 | "ToolCallFormat",
34 | # Utilities
35 | "tool",
36 | "create_tool_from_function",
37 | # Examples
38 | "example_calculator",
39 | "example_get_weather",
40 | ]
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Lemony Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/examples/batch_processing.py:
--------------------------------------------------------------------------------
1 | """
2 | Example: Batch Processing with cascadeflow v0.2.1
3 |
4 | This example demonstrates batch processing capabilities.
5 | """
6 |
7 | import asyncio
8 |
9 | from cascadeflow import CascadeAgent
10 |
11 |
12 | async def main():
13 | # Create agent
14 | agent = CascadeAgent.from_env()
15 |
16 | # Simple batch processing
17 | queries = [
18 | "What is Python?",
19 | "What is JavaScript?",
20 | "What is Rust?",
21 | ]
22 |
23 | print("Processing 3 queries in batch...")
24 | result = await agent.run_batch(queries)
25 |
26 | print(f"\n✓ Success: {result.success_count}/{len(queries)}")
27 | print(f"✓ Total cost: ${result.total_cost:.4f}")
28 | print(f"✓ Average cost: ${result.average_cost:.4f}")
29 | print(f"✓ Total time: {result.total_time:.2f}s")
30 | print(f"✓ Strategy: {result.strategy_used}")
31 |
32 | for i, cascade_result in enumerate(result.results):
33 | if cascade_result:
34 | print(f"\nQuery {i+1}: {cascade_result.content[:100]}...")
35 |
36 |
37 | if __name__ == "__main__":
38 | asyncio.run(main())
39 |
--------------------------------------------------------------------------------
/cascadeflow/core/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Core cascade execution engine.
3 |
4 | This module contains:
5 | - Execution planning and strategy selection
6 | - Domain detection and model scoring
7 | - Speculative cascade implementation
8 | - Batch processing (v0.2.1+)
9 | """
10 |
11 | from .cascade import (
12 | SpeculativeCascade,
13 | SpeculativeResult,
14 | WholeResponseCascade,
15 | )
16 | from .execution import (
17 | DomainDetector,
18 | ExecutionPlan,
19 | ExecutionStrategy,
20 | LatencyAwareExecutionPlanner,
21 | ModelScorer,
22 | )
23 | from .batch_config import BatchConfig, BatchStrategy
24 | from .batch import BatchProcessor, BatchResult, BatchProcessingError
25 |
26 | __all__ = [
27 | # Execution
28 | "DomainDetector",
29 | "ExecutionPlan",
30 | "ExecutionStrategy",
31 | "LatencyAwareExecutionPlanner",
32 | "ModelScorer",
33 | # Cascade
34 | "WholeResponseCascade",
35 | "SpeculativeCascade",
36 | "SpeculativeResult",
37 | # Batch Processing (v0.2.1+)
38 | "BatchConfig",
39 | "BatchStrategy",
40 | "BatchProcessor",
41 | "BatchResult",
42 | "BatchProcessingError",
43 | ]
44 |
--------------------------------------------------------------------------------
/packages/core/src/tools/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Tools Module
3 | *
4 | * Universal tool configuration and execution for LLM function calling.
5 | *
6 | * @module tools
7 | */
8 |
9 | // Tool Configuration
10 | export {
11 | ToolConfig,
12 | createTool,
13 | tool,
14 | inferJsonType,
15 | buildParameterSchema,
16 | } from './config';
17 |
18 | export type {
19 | ToolFunction,
20 | ToolParameters,
21 | ToolConfigOptions,
22 | } from './config';
23 |
24 | // Tool Execution
25 | export { ToolExecutor } from './executor';
26 |
27 | // Tool Calls
28 | export { ToolCall } from './call';
29 | export type { ToolCallOptions } from './call';
30 |
31 | // Tool Results
32 | export { ToolResult } from './result';
33 | export type { ToolResultOptions } from './result';
34 |
35 | // Format Conversion
36 | export {
37 | ToolCallFormat,
38 | toOpenAIFormat,
39 | toAnthropicFormat,
40 | toOllamaFormat,
41 | toProviderFormat,
42 | getProviderFormatType,
43 | } from './formats';
44 |
45 | // Tool Validation
46 | export { ToolValidator, formatToolQualityScore } from './validator';
47 | export type { ToolQualityScore, ComplexityLevel } from './validator';
48 |
--------------------------------------------------------------------------------
/tests/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | """Benchmark framework components."""
2 |
3 | from .base import Benchmark, BenchmarkResult, BenchmarkSummary
4 | from .metrics import CostMetrics, LatencyMetrics, QualityMetrics
5 | from .profiler import CascadeProfile, CascadeProfiler, ComponentProfile
6 | from .reporter import BenchmarkReporter
7 | from .benchmark_config import (
8 | BenchmarkConfig,
9 | BenchmarkMode,
10 | BenchmarkTargets,
11 | DomainBenchmarkConfig,
12 | DEFAULT_TARGETS,
13 | DRAFTER_MODELS,
14 | VERIFIER_MODELS,
15 | DOMAIN_CONFIGS,
16 | )
17 |
18 | __all__ = [
19 | # Base classes
20 | "Benchmark",
21 | "BenchmarkResult",
22 | "BenchmarkSummary",
23 | # Metrics
24 | "CostMetrics",
25 | "LatencyMetrics",
26 | "QualityMetrics",
27 | # Reporter
28 | "BenchmarkReporter",
29 | # Profiler
30 | "CascadeProfile",
31 | "CascadeProfiler",
32 | "ComponentProfile",
33 | # Configuration
34 | "BenchmarkConfig",
35 | "BenchmarkMode",
36 | "BenchmarkTargets",
37 | "DomainBenchmarkConfig",
38 | "DEFAULT_TARGETS",
39 | "DRAFTER_MODELS",
40 | "VERIFIER_MODELS",
41 | "DOMAIN_CONFIGS",
42 | ]
43 |
--------------------------------------------------------------------------------
/cascadeflow/ml/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | ML Module for cascadeflow
3 |
4 | Optional machine learning components for enhanced quality validation,
5 | domain detection, and complexity analysis.
6 |
7 | All ML features:
8 | - Are completely OPTIONAL (graceful degradation)
9 | - Require `fastembed` package (pip install fastembed)
10 | - Use lightweight ONNX models (~40MB)
11 | - Provide better accuracy than rule-based
12 | - Add ~25-50ms latency (optimized)
13 |
14 | Components:
15 | - UnifiedEmbeddingService: Single embedding model for all tasks
16 | - EmbeddingCache: Request-scoped caching for performance
17 |
18 | Example:
19 | >>> from cascadeflow.ml import UnifiedEmbeddingService
20 | >>>
21 | >>> # Initialize (lazy loads model)
22 | >>> embedder = UnifiedEmbeddingService()
23 | >>>
24 | >>> if embedder.is_available:
25 | ... similarity = embedder.similarity("query", "response")
26 | ... print(f"Similarity: {similarity:.2%}")
27 | """
28 |
29 | from .embedding import (
30 | UnifiedEmbeddingService,
31 | EmbeddingCache,
32 | )
33 |
34 | __all__ = [
35 | "UnifiedEmbeddingService",
36 | "EmbeddingCache",
37 | ]
38 |
39 | __version__ = "0.7.0" # ML integration
40 |
--------------------------------------------------------------------------------
/cascadeflow/resilience/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Resilience patterns for CascadeFlow.
3 |
4 | Provides production-grade resilience features:
5 | - Circuit Breaker: Prevent cascading failures
6 | - Health monitoring: Track provider health
7 |
8 | Example:
9 | >>> from cascadeflow.resilience import CircuitBreaker, CircuitState
10 | >>>
11 | >>> # Create circuit breaker for a provider
12 | >>> breaker = CircuitBreaker(
13 | ... failure_threshold=5,
14 | ... recovery_timeout=30.0,
15 | ... half_open_max_calls=3
16 | ... )
17 | >>>
18 | >>> # Check if provider is available
19 | >>> if breaker.can_execute():
20 | ... try:
21 | ... result = await provider.complete(...)
22 | ... breaker.record_success()
23 | ... except Exception as e:
24 | ... breaker.record_failure(e)
25 | """
26 |
27 | from .circuit_breaker import (
28 | CircuitBreaker,
29 | CircuitBreakerConfig,
30 | CircuitBreakerRegistry,
31 | CircuitState,
32 | get_circuit_breaker,
33 | )
34 |
35 | __all__ = [
36 | "CircuitBreaker",
37 | "CircuitBreakerConfig",
38 | "CircuitBreakerRegistry",
39 | "CircuitState",
40 | "get_circuit_breaker",
41 | ]
42 |
--------------------------------------------------------------------------------
/cascadeflow/dynamic_config/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Dynamic Configuration Management for CascadeFlow.
3 |
4 | Provides runtime configuration updates without service restart:
5 | - ConfigManager: Central config management with event system
6 | - ConfigWatcher: File watching for automatic config reload
7 | - Thread-safe configuration updates
8 | - Event callbacks for config changes
9 |
10 | Example:
11 | >>> from cascadeflow.config import ConfigManager, ConfigWatcher
12 | >>>
13 | >>> # Create manager with initial config
14 | >>> manager = ConfigManager(config_path="cascadeflow.yaml")
15 | >>>
16 | >>> # Register callback for config changes
17 | >>> @manager.on_change("quality_threshold")
18 | ... def on_threshold_change(old, new):
19 | ... print(f"Threshold changed: {old} -> {new}")
20 | >>>
21 | >>> # Enable file watching for auto-reload
22 | >>> watcher = ConfigWatcher(manager, interval=5.0)
23 | >>> watcher.start()
24 | >>>
25 | >>> # Manual config update
26 | >>> manager.update(quality_threshold=0.85)
27 | """
28 |
29 | from .manager import (
30 | ConfigManager,
31 | ConfigChangeEvent,
32 | ConfigSection,
33 | )
34 | from .watcher import (
35 | ConfigWatcher,
36 | )
37 |
38 | __all__ = [
39 | "ConfigManager",
40 | "ConfigChangeEvent",
41 | "ConfigSection",
42 | "ConfigWatcher",
43 | ]
44 |
--------------------------------------------------------------------------------
/cascadeflow/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions and helpers for cascadeflow.
3 |
4 | This module provides:
5 | - Logging and formatting utilities (helpers.py)
6 | - Response caching (caching.py)
7 | - Convenience presets for quick setup (presets.py)
8 | """
9 |
10 | # Caching
11 | from .caching import ResponseCache
12 |
13 | # Helpers (was utils.py)
14 | from .helpers import (
15 | calculate_cosine_similarity,
16 | estimate_tokens,
17 | format_cost,
18 | get_env_or_raise,
19 | parse_model_identifier,
20 | setup_logging,
21 | truncate_text,
22 | )
23 |
24 | # Presets (v0.2.0 - function-based presets)
25 | from .presets import (
26 | auto_agent,
27 | get_balanced_agent,
28 | get_cost_optimized_agent,
29 | get_development_agent,
30 | get_quality_optimized_agent,
31 | get_speed_optimized_agent,
32 | )
33 |
34 | __all__ = [
35 | # Helpers
36 | "setup_logging",
37 | "format_cost",
38 | "estimate_tokens",
39 | "truncate_text",
40 | "calculate_cosine_similarity",
41 | "get_env_or_raise",
42 | "parse_model_identifier",
43 | # Caching
44 | "ResponseCache",
45 | # Presets (v0.2.0 - function-based)
46 | "get_cost_optimized_agent",
47 | "get_balanced_agent",
48 | "get_speed_optimized_agent",
49 | "get_quality_optimized_agent",
50 | "get_development_agent",
51 | "auto_agent",
52 | ]
53 |
--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
1 | name: Auto Labeler
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize, reopened]
6 | issues:
7 | types: [opened]
8 |
9 | permissions:
10 | contents: read
11 | pull-requests: write
12 | issues: write
13 |
14 | jobs:
15 | label-pr:
16 | name: Label Pull Requests
17 | runs-on: ubuntu-latest
18 | if: github.event_name == 'pull_request'
19 |
20 | steps:
21 | - name: Checkout code
22 | uses: actions/checkout@v4
23 |
24 | - name: Label based on changed files
25 | uses: actions/labeler@v5
26 | with:
27 | repo-token: ${{ secrets.GITHUB_TOKEN }}
28 | configuration-path: .github/labeler.yml
29 |
30 | label-size:
31 | name: Label PR Size
32 | runs-on: ubuntu-latest
33 | if: github.event_name == 'pull_request'
34 |
35 | steps:
36 | - name: Label PR by size
37 | uses: codelytv/pr-size-labeler@v1
38 | with:
39 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
40 | xs_label: 'size/xs'
41 | xs_max_size: 10
42 | s_label: 'size/s'
43 | s_max_size: 100
44 | m_label: 'size/m'
45 | m_max_size: 500
46 | l_label: 'size/l'
47 | l_max_size: 1000
48 | xl_label: 'size/xl'
49 | message_if_xl: 'This PR is extremely large. Consider splitting it into smaller PRs.'
--------------------------------------------------------------------------------
/.github/assets/CF_n8n_color.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cascadeflow/guardrails/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Guardrails for cascadeflow - Content Safety and Compliance.
3 |
4 | Provides basic content moderation and PII detection for production safety.
5 |
6 | Key components:
7 | - ContentModerator: Detect harmful content
8 | - PIIDetector: Detect personally identifiable information
9 | - GuardrailsManager: Centralized guardrails management
10 |
11 | Example usage:
12 | from cascadeflow.guardrails import GuardrailsManager
13 | from cascadeflow import UserProfile, TierLevel
14 |
15 | profile = UserProfile.from_tier(
16 | TierLevel.PRO,
17 | user_id="user_123",
18 | enable_content_moderation=True,
19 | enable_pii_detection=True
20 | )
21 |
22 | manager = GuardrailsManager()
23 |
24 | # Check content before processing
25 | safe, violations = await manager.check_content(
26 | text="User input here",
27 | profile=profile
28 | )
29 |
30 | if not safe:
31 | raise GuardrailViolation(f"Content blocked: {violations}")
32 | """
33 |
34 | from .content_moderator import ContentModerator, ModerationResult
35 | from .pii_detector import PIIDetector, PIIMatch
36 | from .manager import GuardrailsManager, GuardrailViolation
37 |
38 | __all__ = [
39 | "ContentModerator",
40 | "ModerationResult",
41 | "PIIDetector",
42 | "PIIMatch",
43 | "GuardrailsManager",
44 | "GuardrailViolation",
45 | ]
46 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | """Tests for utility functions."""
2 |
3 | import pytest
4 |
5 | from cascadeflow.utils import estimate_tokens, format_cost
6 |
7 |
8 | def test_format_cost_zero():
9 | """Test formatting zero cost."""
10 | assert format_cost(0.0) == "$0.0000"
11 |
12 |
13 | def test_format_cost_small():
14 | """Test formatting small costs."""
15 | assert format_cost(0.002) == "$0.0020"
16 | assert format_cost(0.00001) == "$0.0000"
17 |
18 |
19 | def test_format_cost_medium():
20 | """Test formatting medium costs."""
21 | assert format_cost(0.5) == "$0.5000"
22 | assert format_cost(1.5) == "$1.5000"
23 |
24 |
25 | def test_format_cost_large():
26 | """Test formatting large costs."""
27 | assert format_cost(10.0) == "$10.0000"
28 | assert format_cost(100.5) == "$100.5000"
29 |
30 |
31 | def test_estimate_tokens_empty():
32 | """Test with empty string."""
33 | assert estimate_tokens("") == 1 # Minimum 1
34 |
35 |
36 | def test_estimate_tokens_short():
37 | """Test with short text."""
38 | tokens = estimate_tokens("Hello")
39 | assert tokens > 0
40 |
41 |
42 | def test_estimate_tokens_long():
43 | """Test with longer text."""
44 | text = "This is a longer sentence with multiple words"
45 | tokens = estimate_tokens(text)
46 | assert tokens == len(text) // 4
47 |
48 |
49 | if __name__ == "__main__":
50 | pytest.main([__file__, "-v"])
51 |
--------------------------------------------------------------------------------
/cascadeflow/profiles/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | User Profile System for cascadeflow.
3 |
4 | This module provides a comprehensive user profile system for managing
5 | thousands of users with different subscription tiers, limits, and preferences.
6 |
7 | Key components:
8 | - TierConfig: Predefined subscription tiers (FREE, STARTER, PRO, BUSINESS, ENTERPRISE)
9 | - UserProfile: Multi-dimensional user profile (identity, tier, limits, preferences, guardrails, telemetry)
10 | - UserProfileManager: Profile management at scale with caching and database integration
11 |
12 | Example usage:
13 | from cascadeflow.profiles import UserProfile, TierLevel
14 | from cascadeflow import CascadeAgent
15 |
16 | # Create profile from tier preset
17 | profile = UserProfile.from_tier(TierLevel.PRO, user_id="user_123")
18 |
19 | # Create agent from profile
20 | agent = CascadeAgent.from_profile(profile)
21 |
22 | # Use profile manager for scaling
23 | from cascadeflow.profiles import UserProfileManager
24 |
25 | manager = UserProfileManager(cache_ttl_seconds=300)
26 | profile = await manager.get_profile("user_123")
27 | """
28 |
29 | from .tier_config import TierConfig, TierLevel, TIER_PRESETS
30 | from .user_profile import UserProfile
31 | from .profile_manager import UserProfileManager
32 |
33 | __all__ = [
34 | # Tier system
35 | "TierConfig",
36 | "TierLevel",
37 | "TIER_PRESETS",
38 | # User profiles
39 | "UserProfile",
40 | # Profile management
41 | "UserProfileManager",
42 | ]
43 |
--------------------------------------------------------------------------------
/cascadeflow/interface/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | cascadeflow Interface Module
3 | ============================
4 |
5 | User-facing interface components for visual feedback and UI.
6 |
7 | Current Components:
8 | -------------------
9 | - VisualConsumer: Base visual feedback system
10 | - TerminalVisualConsumer: Terminal streaming with indicators
11 | - SilentConsumer: No-output consumer for testing
12 |
13 | Future Extensions:
14 | ------------------
15 | - TerminalUI: Enhanced terminal with Rich formatting
16 | - Progress bars, tables, live updates
17 | - Color schemes, themes
18 | - ASCII art, spinners
19 |
20 | - JupyterUI: Jupyter notebook widgets
21 | - IPython display integration
22 | - Interactive widgets
23 | - Plotly/Matplotlib charts
24 | - Real-time metrics
25 |
26 | - WebUI: Web dashboard interface
27 | - FastAPI/Flask endpoints
28 | - WebSocket streaming
29 | - React/Vue components
30 | - REST API
31 |
32 | - Formatters: Output formatting
33 | - JSON exporter
34 | - Markdown formatter
35 | - CSV exporter
36 | - HTML reports
37 |
38 | Usage:
39 | ------
40 | ```python
41 | from cascadeflow.interface import TerminalVisualConsumer
42 |
43 | # Create consumer
44 | consumer = TerminalVisualConsumer(enable_visual=True)
45 |
46 | # Use with agent
47 | result = await agent.run_streaming(query)
48 | ```
49 | """
50 |
51 | from .visual_consumer import (
52 | SilentConsumer,
53 | TerminalVisualConsumer,
54 | )
55 |
56 | __all__ = [
57 | "TerminalVisualConsumer",
58 | "SilentConsumer",
59 | ]
60 |
61 | __version__ = "2.2.0"
62 |
--------------------------------------------------------------------------------
/tests/benchmarks/README.md:
--------------------------------------------------------------------------------
1 | ### Benchmark Suite
2 |
3 | Professional benchmarks to validate CascadeFlow performance across real-world use cases.
4 |
5 | #### Datasets
6 |
7 | 1. **HumanEval** - Code generation (164 programming problems)
8 | 2. **Bitext Customer Support** - Customer service Q&A (27,000+ examples)
9 | 3. **Banking77** - Banking intent classification (13,000+ examples)
10 | 4. **GSM8K** - Grade school math reasoning (8,500+ problems)
11 |
12 | #### Metrics
13 |
14 | Each benchmark measures:
15 | - **Cost savings** vs. always-powerful-model baseline
16 | - **Quality maintenance** (accuracy/pass rate)
17 | - **Latency** improvements
18 | - **Escalation rates** (drafter acceptance %)
19 |
20 | #### Running Benchmarks
21 |
22 | ```bash
23 | # Run a single benchmark
24 | python -m benchmarks.datasets.humaneval
25 |
26 | # Run all benchmarks
27 | python -m benchmarks.run_all
28 |
29 | # View results
30 | ls benchmarks/results/
31 | ```
32 |
33 | #### Output
34 |
35 | - **JSON**: Detailed results for analysis
36 | - **CSV**: Tabular data for Excel/graphs
37 | - **Markdown**: Human-readable reports with ROI calculations
38 |
39 | #### Structure
40 |
41 | ```
42 | benchmarks/
43 | ├── base.py # Abstract benchmark class
44 | ├── metrics.py # Cost/latency/quality calculations
45 | ├── reporter.py # Report generation
46 | ├── humaneval.py # Code generation benchmark
47 | ├── customer_support.py # Customer service Q&A
48 | ├── banking77.py # Banking intent classification
49 | ├── gsm8k.py # Math reasoning
50 | └── results/ # Output directory
51 | ```
52 |
53 | All benchmarks extend the `Benchmark` base class.
54 |
--------------------------------------------------------------------------------
/.github/assets/CF_ts_color.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/test_hf_api.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import os
3 |
4 | import httpx
5 |
6 |
7 | async def test_hf():
8 | token = os.getenv("HF_TOKEN")
9 |
10 | # Test different models
11 | models = ["distilgpt2", "gpt2", "openai-community/gpt2"]
12 |
13 | for model in models:
14 | print(f"\nTesting: {model}")
15 | print("=" * 50)
16 |
17 | try:
18 | async with httpx.AsyncClient(timeout=30.0) as client:
19 | # Updated to new HuggingFace Inference Providers API endpoint
20 | # Old: https://api-inference.huggingface.co (deprecated Jan 2025)
21 | # New: https://router.huggingface.co/hf-inference (as of Nov 2025)
22 | response = await client.post(
23 | f"https://router.huggingface.co/hf-inference/models/{model}",
24 | headers={"Authorization": f"Bearer {token}"},
25 | json={"inputs": "Hello"},
26 | )
27 |
28 | print(f"Status: {response.status_code}")
29 |
30 | if response.status_code == 200:
31 | data = response.json()
32 | print(f"✅ Success: {data}")
33 | return model # Return working model
34 | else:
35 | print(f"❌ Error: {response.text[:200]}")
36 |
37 | except Exception as e:
38 | print(f"❌ Exception: {e}")
39 |
40 | return None
41 |
42 |
43 | if __name__ == "__main__":
44 | working_model = asyncio.run(test_hf())
45 | if working_model:
46 | print(f"\n✅ Use this model in tests: {working_model}")
47 | else:
48 | print("\n❌ No models working - HF API may be down or account issue")
49 |
--------------------------------------------------------------------------------
/tests/test_exceptions.py:
--------------------------------------------------------------------------------
1 | """Tests for custom exceptions."""
2 |
3 | import pytest
4 |
5 | from cascadeflow import (
6 | BudgetExceededError,
7 | ModelError,
8 | ProviderError,
9 | QualityThresholdError,
10 | cascadeflowError,
11 | )
12 |
13 |
14 | def test_base_exception():
15 | """Test base cascadeflowError."""
16 | error = cascadeflowError("Test error")
17 | assert "Test error" in str(error)
18 |
19 |
20 | def test_budget_exceeded_error():
21 | """Test BudgetExceededError creation and attributes."""
22 | error = BudgetExceededError("Budget exceeded", remaining=0.5)
23 |
24 | assert "Budget exceeded" in str(error)
25 | assert error.remaining == 0.5
26 | assert isinstance(error, cascadeflowError)
27 |
28 |
29 | def test_quality_threshold_error():
30 | """Test QualityThresholdError."""
31 | error = QualityThresholdError("Quality too low")
32 |
33 | assert "Quality too low" in str(error)
34 | assert isinstance(error, cascadeflowError)
35 |
36 |
37 | def test_provider_error():
38 | """Test ProviderError with provider attribute."""
39 | error = ProviderError("API failed", provider="openai")
40 |
41 | assert "API failed" in str(error)
42 | assert error.provider == "openai"
43 | assert isinstance(error, cascadeflowError)
44 |
45 |
46 | def test_model_error():
47 | """Test ModelError with model and provider attributes."""
48 | error = ModelError("Model failed", model="gpt-4", provider="openai")
49 |
50 | assert "Model failed" in str(error)
51 | assert error.model == "gpt-4"
52 | assert error.provider == "openai"
53 | assert isinstance(error, cascadeflowError)
54 |
55 |
56 | if __name__ == "__main__":
57 | pytest.main([__file__, "-v"])
58 |
--------------------------------------------------------------------------------
/packages/core/typedoc.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://typedoc.org/schema.json",
3 | "entryPoints": ["src/index.ts"],
4 | "out": "docs/api",
5 | "plugin": [],
6 | "exclude": [
7 | "**/*.test.ts",
8 | "**/__tests__/**",
9 | "**/node_modules/**"
10 | ],
11 | "excludePrivate": true,
12 | "excludeProtected": false,
13 | "excludeInternal": false,
14 | "includeVersion": true,
15 | "sort": ["source-order"],
16 | "kindSortOrder": [
17 | "Class",
18 | "Interface",
19 | "Function",
20 | "Variable",
21 | "TypeAlias",
22 | "Enum"
23 | ],
24 | "categorizeByGroup": true,
25 | "categoryOrder": [
26 | "Core",
27 | "Models",
28 | "Quality",
29 | "Streaming",
30 | "Tools",
31 | "Providers",
32 | "Utilities",
33 | "*"
34 | ],
35 | "readme": "README.md",
36 | "name": "@cascadeflow/core",
37 | "navigationLinks": {
38 | "GitHub": "https://github.com/lemony-ai/cascadeflow",
39 | "Examples": "https://github.com/lemony-ai/cascadeflow/tree/main/packages/core/examples",
40 | "Python Docs": "https://github.com/lemony-ai/cascadeflow/tree/main/docs"
41 | },
42 | "searchInComments": true,
43 | "validation": {
44 | "notExported": false,
45 | "invalidLink": false,
46 | "notDocumented": false
47 | },
48 | "skipErrorChecking": true,
49 | "visibilityFilters": {
50 | "protected": false,
51 | "private": false,
52 | "inherited": true,
53 | "external": false
54 | },
55 | "githubPages": true,
56 | "gitRevision": "main",
57 | "sourceLinkTemplate": "https://github.com/lemony-ai/cascadeflow/blob/{gitRevision}/packages/core/{path}#L{line}",
58 | "theme": "default",
59 | "hideGenerator": false,
60 | "basePath": ".",
61 | "treatWarningsAsErrors": false
62 | }
63 |
--------------------------------------------------------------------------------
/examples/docker/multi-instance-ollama/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 |
3 | services:
4 | # Draft model - Fast, small model for initial responses
5 | # Uses GPU 0 for quick inference
6 | ollama-draft:
7 | image: ollama/ollama:latest
8 | container_name: ollama-draft
9 | ports:
10 | - "11434:11434"
11 | volumes:
12 | - ollama-draft-data:/root/.ollama
13 | environment:
14 | - OLLAMA_HOST=0.0.0.0:11434
15 | deploy:
16 | resources:
17 | reservations:
18 | devices:
19 | - driver: nvidia
20 | device_ids: ['0'] # Use first GPU
21 | capabilities: [gpu]
22 | restart: unless-stopped
23 | healthcheck:
24 | test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
25 | interval: 30s
26 | timeout: 10s
27 | retries: 3
28 |
29 | # Verifier model - Powerful, large model for quality checks
30 | # Uses GPU 1 for accurate responses
31 | ollama-verifier:
32 | image: ollama/ollama:latest
33 | container_name: ollama-verifier
34 | ports:
35 | - "11435:11434" # Different external port
36 | volumes:
37 | - ollama-verifier-data:/root/.ollama
38 | environment:
39 | - OLLAMA_HOST=0.0.0.0:11434
40 | deploy:
41 | resources:
42 | reservations:
43 | devices:
44 | - driver: nvidia
45 | device_ids: ['1'] # Use second GPU
46 | capabilities: [gpu]
47 | restart: unless-stopped
48 | healthcheck:
49 | test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
50 | interval: 30s
51 | timeout: 10s
52 | retries: 3
53 |
54 | volumes:
55 | ollama-draft-data:
56 | driver: local
57 | ollama-verifier-data:
58 | driver: local
59 |
--------------------------------------------------------------------------------
/packages/ml/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@cascadeflow/ml",
3 | "version": "0.6.5",
4 | "description": "ML semantic detection for cascadeflow TypeScript - Feature parity with Python",
5 | "author": {
6 | "name": "Lemony Inc.",
7 | "email": "hello@lemony.ai"
8 | },
9 | "license": "MIT",
10 | "repository": {
11 | "type": "git",
12 | "url": "https://github.com/lemony-ai/cascadeflow.git",
13 | "directory": "packages/ml"
14 | },
15 | "main": "./dist/index.js",
16 | "module": "./dist/index.mjs",
17 | "types": "./dist/index.d.ts",
18 | "exports": {
19 | ".": {
20 | "types": "./dist/index.d.ts",
21 | "import": "./dist/index.mjs",
22 | "require": "./dist/index.js"
23 | }
24 | },
25 | "files": [
26 | "dist",
27 | "README.md"
28 | ],
29 | "scripts": {
30 | "build": "tsup src/index.ts --format cjs,esm --dts --clean",
31 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
32 | "test": "vitest run",
33 | "test:watch": "vitest",
34 | "lint": "eslint src --ext .ts",
35 | "typecheck": "tsc --noEmit",
36 | "clean": "rm -rf dist"
37 | },
38 | "keywords": [
39 | "ai",
40 | "ml",
41 | "embeddings",
42 | "semantic-search",
43 | "transformers",
44 | "bge",
45 | "cascadeflow",
46 | "domain-detection",
47 | "semantic-validation"
48 | ],
49 | "dependencies": {
50 | "@xenova/transformers": "^2.17.2"
51 | },
52 | "devDependencies": {
53 | "@types/node": "^20.10.0",
54 | "eslint": "^8.55.0",
55 | "@typescript-eslint/eslint-plugin": "^6.15.0",
56 | "@typescript-eslint/parser": "^6.15.0",
57 | "tsup": "^8.0.1",
58 | "typescript": "^5.3.3",
59 | "vitest": "^1.0.4"
60 | },
61 | "engines": {
62 | "node": ">=18.0.0"
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/cascadeflow/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | cascadeflow Streaming Module
3 | =============================
4 |
5 | Provides real-time streaming for both text and tool-calling cascades.
6 |
7 | Modules:
8 | - base: Text streaming (StreamManager, StreamEvent)
9 | - tools: Tool streaming (ToolStreamManager, ToolStreamEvent)
10 | - utils: Shared utilities (ProgressiveJSONParser, etc.)
11 |
12 | Usage:
13 | # Text streaming
14 | from cascadeflow.streaming import StreamManager, StreamEvent, StreamEventType
15 |
16 | manager = StreamManager(cascade)
17 | async for event in manager.stream(query):
18 | if event.type == StreamEventType.CHUNK:
19 | print(event.content, end='')
20 |
21 | # Tool streaming
22 | from cascadeflow.streaming import ToolStreamManager, ToolStreamEvent
23 |
24 | tool_manager = ToolStreamManager(cascade)
25 | async for event in tool_manager.stream(query, tools=tools):
26 | if event.type == ToolStreamEventType.TOOL_CALL_START:
27 | print(f"[Calling: {event.tool_call['name']}]")
28 | """
29 |
30 | # Text streaming
31 | from .base import (
32 | StreamEvent,
33 | StreamEventType,
34 | StreamManager,
35 | )
36 |
37 | # Tool streaming
38 | from .tools import (
39 | ToolStreamEvent,
40 | ToolStreamEventType,
41 | ToolStreamManager,
42 | )
43 |
44 | # Utilities
45 | from .utils import (
46 | JSONParseState,
47 | ProgressiveJSONParser,
48 | )
49 |
50 | __all__ = [
51 | # Text streaming
52 | "StreamEventType",
53 | "StreamEvent",
54 | "StreamManager",
55 | # Tool streaming
56 | "ToolStreamEventType",
57 | "ToolStreamEvent",
58 | "ToolStreamManager",
59 | # Utilities
60 | "ProgressiveJSONParser",
61 | "JSONParseState",
62 | ]
63 |
64 | __version__ = "2.0.0"
65 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/examples/inspect-metadata.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Metadata Inspection Script
3 | *
4 | * Checks what metadata is actually being injected into responses
5 | */
6 |
7 | import { ChatOpenAI } from '@langchain/openai';
8 | import { withCascade } from '../src/index.js';
9 |
10 | async function main() {
11 | const drafter = new ChatOpenAI({ model: 'gpt-4o-mini', temperature: 0.7 });
12 | const verifier = new ChatOpenAI({ model: 'gpt-4o', temperature: 0.7 });
13 |
14 | const cascadeModel = withCascade({
15 | drafter,
16 | verifier,
17 | qualityThreshold: 0.7,
18 | enableCostTracking: true,
19 | });
20 |
21 | console.log('Testing metadata injection...\n');
22 |
23 | // Test with simple query
24 | const result = await cascadeModel.invoke('What is 2+2?');
25 |
26 | console.log('=== Response Structure ===');
27 | console.log('Content:', result.content);
28 | console.log('\n=== Additional Kwargs ===');
29 | console.log(JSON.stringify(result.additional_kwargs, null, 2));
30 |
31 | console.log('\n=== Response Metadata ===');
32 | console.log(JSON.stringify(result.response_metadata, null, 2));
33 |
34 | console.log('\n=== Last Cascade Result ===');
35 | const stats = cascadeModel.getLastCascadeResult();
36 | console.log(JSON.stringify(stats, null, 2));
37 |
38 | // Test calling _generate directly to see llmOutput
39 | console.log('\n\n=== Testing _generate directly ===');
40 | const { HumanMessage } = await import('@langchain/core/messages');
41 | const chatResult = await cascadeModel._generate([new HumanMessage('What is the capital of France?')], {});
42 |
43 | console.log('llmOutput:', JSON.stringify(chatResult.llmOutput, null, 2));
44 | console.log('\nGeneration text:', chatResult.generations[0].text);
45 | }
46 |
47 | main().catch(console.error);
48 |
--------------------------------------------------------------------------------
/cascadeflow/tools/examples.py:
--------------------------------------------------------------------------------
1 | """
2 | Example tools for cascadeflow.
3 |
4 | Save this as: cascadeflow/tools/examples.py
5 |
6 | Provides ready-to-use example tools for testing and documentation.
7 | """
8 |
9 | from typing import Any
10 |
11 |
12 | def example_calculator(operation: str, x: float, y: float) -> float:
13 | """
14 | Perform basic arithmetic operations.
15 |
16 | Args:
17 | operation: Operation to perform (add, subtract, multiply, divide)
18 | x: First number
19 | y: Second number
20 |
21 | Returns:
22 | Result of the operation
23 |
24 | Raises:
25 | ValueError: If operation is not recognized
26 | """
27 | operations = {
28 | "add": lambda a, b: a + b,
29 | "subtract": lambda a, b: a - b,
30 | "multiply": lambda a, b: a * b,
31 | "divide": lambda a, b: a / b if b != 0 else float("inf"),
32 | }
33 |
34 | if operation not in operations:
35 | raise ValueError(
36 | f"Unknown operation: {operation}. " f"Valid operations: {', '.join(operations.keys())}"
37 | )
38 |
39 | return operations[operation](x, y)
40 |
41 |
42 | def example_get_weather(location: str, unit: str = "celsius") -> dict[str, Any]:
43 | """
44 | Get current weather for a location (mock implementation).
45 |
46 | Args:
47 | location: City name
48 | unit: Temperature unit (celsius or fahrenheit)
49 |
50 | Returns:
51 | Weather data dictionary with temperature, condition, and humidity
52 | """
53 | # Mock implementation - returns fixed data
54 | temp = 22 if unit == "celsius" else 72
55 |
56 | return {
57 | "location": location,
58 | "temperature": temp,
59 | "unit": unit,
60 | "condition": "sunny",
61 | "humidity": 65,
62 | }
63 |
--------------------------------------------------------------------------------
/packages/core/src/streaming/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Streaming Module Index
3 | *
4 | * Exports all streaming-related functionality:
5 | * - StreamManager for cascade streaming
6 | * - ToolStreamManager for tool-calling streaming
7 | * - Utilities for JSON parsing, validation, confidence estimation
8 | * - Event types and interfaces
9 | */
10 |
11 | // Re-export from main streaming file
12 | export {
13 | StreamEventType,
14 | createStreamEvent,
15 | isChunkEvent,
16 | isCompleteEvent,
17 | isErrorEvent,
18 | collectStream,
19 | collectResult,
20 | } from '../streaming';
21 | export type {
22 | StreamEvent,
23 | StreamEventData,
24 | StreamChunk,
25 | StreamOptions,
26 | } from '../streaming';
27 |
28 | // Stream Manager
29 | export {
30 | StreamManager,
31 | createStreamManager,
32 | } from './stream-manager';
33 | export type {
34 | StreamManagerConfig,
35 | StreamOptions as StreamManagerOptions,
36 | } from './stream-manager';
37 |
38 | // Tool Stream Manager
39 | export {
40 | ToolStreamManager,
41 | createToolStreamManager,
42 | ToolStreamEventType,
43 | } from './tool-stream-manager';
44 | export type {
45 | ToolStreamEvent,
46 | ToolStreamManagerConfig,
47 | ToolStreamOptions,
48 | } from './tool-stream-manager';
49 |
50 | // Utilities
51 | export {
52 | ProgressiveJSONParser,
53 | ToolCallValidator,
54 | JSONParseState,
55 | estimateConfidenceFromLogprobs,
56 | estimateConfidenceFromContent,
57 | estimateTokens,
58 | } from './utils';
59 | export type {
60 | ParseResult,
61 | } from './utils';
62 |
63 | // Event Formatter
64 | export {
65 | EventFormatter,
66 | createEventFormatter,
67 | getDefaultFormatter,
68 | quickFormat,
69 | VISUAL_ICONS,
70 | COLORS,
71 | } from './event-formatter';
72 | export type {
73 | EventFormatterConfig,
74 | } from './event-formatter';
75 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # CascadeFlow Production Requirements
2 | # Minimal core dependencies only - provider SDKs are optional extras
3 |
4 | # ============================================================================
5 | # CORE DEPENDENCIES (Always Required)
6 | # ============================================================================
7 |
8 | # Data validation and settings
9 | pydantic>=2.0.0
10 |
11 | # HTTP client for API calls
12 | httpx>=0.25.0
13 |
14 | # Token counting and cost estimation
15 | tiktoken>=0.5.0
16 |
17 | # Terminal output and logging
18 | rich>=13.0.0
19 |
20 |
21 | # ============================================================================
22 | # PROVIDER INSTALLATION (Optional - Use Extras)
23 | # ============================================================================
24 |
25 | # Install specific providers as needed:
26 | # pip install cascadeflow[openai] → Adds openai>=1.0.0
27 | # pip install cascadeflow[anthropic] → Adds anthropic>=0.8.0
28 | # pip install cascadeflow[groq] → Adds groq>=0.4.0
29 | # pip install cascadeflow[providers] → Adds OpenAI + Anthropic + Groq
30 | # pip install cascadeflow[all] → Everything
31 |
32 | # FREE/LOCAL OPTIONS (No Python packages needed):
33 | # Ollama → Just HTTP to localhost:11434
34 | # vLLM → Can use HTTP to vLLM server (or install cascadeflow[vllm])
35 |
36 |
37 | # ============================================================================
38 | # INSTALLATION EXAMPLES
39 | # ============================================================================
40 |
41 | # Minimal (core only):
42 | # pip install cascadeflow
43 | #
44 | # With OpenAI:
45 | # pip install cascadeflow[openai]
46 | #
47 | # With common providers:
48 | # pip install cascadeflow[providers]
49 | #
50 | # Everything:
51 | # pip install cascadeflow[all]
--------------------------------------------------------------------------------
/packages/integrations/n8n/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@cascadeflow/n8n-nodes-cascadeflow",
3 | "version": "0.6.7",
4 | "description": "n8n node for cascadeflow - Smart AI model cascading with 40-85% cost savings",
5 | "keywords": [
6 | "n8n-community-node-package",
7 | "n8n",
8 | "cascadeflow",
9 | "ai",
10 | "llm",
11 | "cost-optimization"
12 | ],
13 | "license": "MIT",
14 | "homepage": "https://github.com/lemony-ai/cascadeflow",
15 | "author": {
16 | "name": "Lemony Inc.",
17 | "email": "hello@lemony.ai"
18 | },
19 | "repository": {
20 | "type": "git",
21 | "url": "https://github.com/lemony-ai/cascadeflow.git",
22 | "directory": "packages/integrations/n8n"
23 | },
24 | "scripts": {
25 | "build": "tsc && gulp build:icons",
26 | "dev": "tsc --watch",
27 | "format": "prettier nodes credentials --write",
28 | "lint": "eslint \"nodes/**/*.ts\" \"credentials/**/*.ts\" package.json",
29 | "lintfix": "eslint \"nodes/**/*.ts\" \"credentials/**/*.ts\" package.json --fix",
30 | "prepublishOnly": "npm run build && npm run lint"
31 | },
32 | "files": [
33 | "dist"
34 | ],
35 | "n8n": {
36 | "n8nNodesApiVersion": 1,
37 | "credentials": [
38 | "dist/credentials/CascadeFlowApi.credentials.js"
39 | ],
40 | "nodes": [
41 | "dist/nodes/LmChatCascadeFlow/LmChatCascadeFlow.node.js"
42 | ]
43 | },
44 | "devDependencies": {
45 | "@types/node": "^20.10.0",
46 | "@typescript-eslint/parser": "^6.0.0",
47 | "eslint": "^8.42.0",
48 | "eslint-plugin-n8n-nodes-base": "^1.11.0",
49 | "gulp": "^4.0.2",
50 | "n8n-workflow": "^1.0.0",
51 | "prettier": "^2.7.1",
52 | "typescript": "^5.1.6"
53 | },
54 | "peerDependencies": {
55 | "n8n-workflow": "*"
56 | },
57 | "dependencies": {
58 | "@langchain/core": "^0.3.0",
59 | "@cascadeflow/core": "^0.6.0"
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/packages/core/examples/browser/vercel-edge/api/chat.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Vercel Edge Function for cascadeflow
3 | *
4 | * This edge function runs globally on Vercel's network for low-latency AI inference.
5 | *
6 | * Deploy: vercel deploy
7 | * Test locally: vercel dev
8 | */
9 |
10 | import { CascadeAgent } from '@cascadeflow/core';
11 |
12 | export const config = {
13 | runtime: 'edge',
14 | };
15 |
16 | export default async function handler(req: Request) {
17 | // Only allow POST requests
18 | if (req.method !== 'POST') {
19 | return new Response('Method not allowed', { status: 405 });
20 | }
21 |
22 | try {
23 | const { query, options } = await req.json();
24 |
25 | if (!query || typeof query !== 'string') {
26 | return new Response('Invalid query', { status: 400 });
27 | }
28 |
29 | // Create cascade agent
30 | const agent = new CascadeAgent({
31 | models: [
32 | {
33 | name: 'gpt-4o-mini',
34 | provider: 'openai',
35 | cost: 0.00015,
36 | apiKey: process.env.OPENAI_API_KEY,
37 | },
38 | {
39 | name: 'gpt-4o',
40 | provider: 'openai',
41 | cost: 0.00625,
42 | apiKey: process.env.OPENAI_API_KEY,
43 | },
44 | ],
45 | });
46 |
47 | // Run cascade
48 | const result = await agent.run(query, options);
49 |
50 | // Return result
51 | return new Response(JSON.stringify(result), {
52 | status: 200,
53 | headers: {
54 | 'Content-Type': 'application/json',
55 | 'Access-Control-Allow-Origin': '*', // Adjust for production
56 | },
57 | });
58 | } catch (error: any) {
59 | console.error('Edge function error:', error);
60 | return new Response(
61 | JSON.stringify({
62 | error: error.message || 'Internal server error',
63 | }),
64 | {
65 | status: 500,
66 | headers: { 'Content-Type': 'application/json' },
67 | }
68 | );
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/examples/vllm_example.py:
--------------------------------------------------------------------------------
1 | """
2 | vLLM provider example.
3 |
4 | Demonstrates using vLLM for high-performance local inference.
5 |
6 | Prerequisites:
7 | 1. Install vLLM: pip install vllm
8 | 2. Start vLLM server:
9 | python -m vllm.entrypoints.openai.api_server \
10 | --model meta-llama/Llama-3-8B-Instruct \
11 | --host 0.0.0.0 \
12 | --port 8000
13 | """
14 |
15 | import asyncio
16 |
17 | from cascadeflow.providers.vllm import VLLMProvider
18 |
19 |
20 | async def main():
21 | """Test vLLM provider."""
22 |
23 | print("vLLM Provider Test\n")
24 |
25 | # Initialize provider
26 | provider = VLLMProvider(base_url="http://localhost:8000/v1")
27 |
28 | try:
29 | # List available models
30 | print("Checking available models...")
31 | models = await provider.list_models()
32 | print(f"Available models: {models}\n")
33 |
34 | if not models:
35 | print("No models found. Make sure vLLM server is running.")
36 | return
37 |
38 | # Use first available model
39 | model = models[0]
40 | print(f"Using model: {model}\n")
41 |
42 | # Test completion
43 | print("Testing completion...")
44 | result = await provider.complete(
45 | prompt="Explain AI in one sentence", model=model, max_tokens=100
46 | )
47 |
48 | print(f"Response: {result.content}")
49 | print(f"Tokens: {result.tokens_used}")
50 | print(f"Latency: {result.latency_ms:.0f}ms")
51 | print(f"Cost: ${result.cost:.4f} (self-hosted)")
52 |
53 | except Exception as e:
54 | print(f"Error: {e}")
55 | print("\nMake sure vLLM server is running:")
56 | print(" python -m vllm.entrypoints.openai.api_server \\")
57 | print(" --model meta-llama/Llama-3-8B-Instruct \\")
58 | print(" --host 0.0.0.0 --port 8000")
59 |
60 | finally:
61 | await provider.client.aclose()
62 |
63 |
64 | if __name__ == "__main__":
65 | asyncio.run(main())
66 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | # Python dependencies - DISABLED: No automatic PRs
4 | - package-ecosystem: "pip"
5 | directory: "/"
6 | schedule:
7 | interval: "weekly"
8 | day: "monday"
9 | open-pull-requests-limit: 0
10 | labels:
11 | - "dependencies"
12 | - "lang: python"
13 | commit-message:
14 | prefix: "chore(deps)"
15 | include: "scope"
16 |
17 | # TypeScript/npm dependencies (root) - DISABLED: No automatic PRs
18 | - package-ecosystem: "npm"
19 | directory: "/"
20 | schedule:
21 | interval: "weekly"
22 | day: "monday"
23 | open-pull-requests-limit: 0
24 | labels:
25 | - "dependencies"
26 | - "lang: typescript"
27 | commit-message:
28 | prefix: "chore(deps)"
29 | include: "scope"
30 |
31 | # TypeScript Core package - DISABLED: No automatic PRs
32 | - package-ecosystem: "npm"
33 | directory: "/packages/core"
34 | schedule:
35 | interval: "weekly"
36 | day: "monday"
37 | open-pull-requests-limit: 0
38 | labels:
39 | - "dependencies"
40 | - "lang: typescript"
41 | - "core"
42 | commit-message:
43 | prefix: "chore(deps-core)"
44 | include: "scope"
45 |
46 | # n8n Integration package - DISABLED: No automatic PRs
47 | - package-ecosystem: "npm"
48 | directory: "/packages/integrations/n8n"
49 | schedule:
50 | interval: "weekly"
51 | day: "monday"
52 | open-pull-requests-limit: 0
53 | labels:
54 | - "dependencies"
55 | - "integration: n8n"
56 | commit-message:
57 | prefix: "chore(deps-n8n)"
58 | include: "scope"
59 |
60 | # GitHub Actions - DISABLED: No automatic PRs
61 | - package-ecosystem: "github-actions"
62 | directory: "/"
63 | schedule:
64 | interval: "weekly"
65 | day: "monday"
66 | open-pull-requests-limit: 0
67 | labels:
68 | - "dependencies"
69 | - "ci/cd"
70 | commit-message:
71 | prefix: "chore(deps-actions)"
72 | include: "scope"
73 |
--------------------------------------------------------------------------------
/.github/assets/README.md:
--------------------------------------------------------------------------------
1 | # cascadeflow Assets
2 |
3 | This directory contains brand assets and logos for cascadeflow used across documentation and READMEs.
4 |
5 | ## Logo Files
6 |
7 | ### Main Logos
8 |
9 | - **`CF_logo_bright.svg`** - cascadeflow logo for light mode/bright backgrounds
10 | - **`CF_logo_dark.svg`** - cascadeflow logo for dark mode/dark backgrounds
11 |
12 | ### Platform Icons
13 |
14 | - **`CF_python_color.svg`** - Python platform icon (color)
15 | - **`CF_ts_color.svg`** - TypeScript platform icon (color)
16 | - **`CF_n8n_color.svg`** - n8n integration icon (color)
17 |
18 | ## Usage
19 |
20 | ### Main Logo with Dark/Light Mode Support
21 |
22 | ```markdown
23 |
24 |
25 |
26 |
27 |
28 | ```
29 |
30 | ### Inline Platform Icons
31 |
32 | ```markdown
33 |
34 |
35 |
36 | ```
37 |
38 | ## Current Usage
39 |
40 | These assets are used in:
41 |
42 | - **Main README** (`/README.md`) - Logo header + navigation icons
43 | - **TypeScript README** (`/packages/core/README.md`) - Logo header + TypeScript icon
44 | - **n8n Integration README** (`/packages/integrations/n8n/README.md`) - Logo header + n8n icon
45 |
46 | ## Brand Guidelines
47 |
48 | - **Logo Usage**: The main logo should be displayed at 80% width with responsive scaling for README headers
49 | - **Logo Styling**: Use `margin: 20px auto;` for proper spacing and centering
50 | - **Icon Usage**: Platform icons should be used at 20-24px for inline navigation
51 | - **Colors**: All icons use official brand colors (Python blue/yellow, TypeScript blue, n8n pink/purple)
52 | - **File Format**: All assets are SVG for scalability and quality at any size
53 |
--------------------------------------------------------------------------------
/packages/core/examples/nodejs/test-complexity-quick.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Quick test of complexity detection improvements
3 | */
4 |
5 | import { ComplexityDetector } from '@cascadeflow/core';
6 |
7 | const detector = new ComplexityDetector();
8 |
9 | const testQueries = [
10 | { q: "What color is the sky?", expected: "trivial" },
11 | { q: "What's the capital of France?", expected: "trivial" },
12 | { q: "Translate 'hello' to Spanish", expected: "simple" },
13 | { q: "Explain the difference between lists and tuples in Python", expected: "moderate" },
14 | { q: "Write a function to reverse a string in Python", expected: "moderate" },
15 | { q: "Explain quantum entanglement and its implications for quantum computing in detail", expected: "expert" },
16 | { q: "Design a microservices architecture for a large-scale e-commerce platform with high availability", expected: "expert" },
17 | { q: "Analyze the philosophical implications of consciousness and free will in the context of determinism", expected: "expert" },
18 | ];
19 |
20 | console.log('='.repeat(80));
21 | console.log('COMPLEXITY DETECTION TEST');
22 | console.log('='.repeat(80));
23 | console.log();
24 |
25 | let correct = 0;
26 | let total = testQueries.length;
27 |
28 | for (const test of testQueries) {
29 | const result = detector.detect(test.q, true);
30 | const match = result.complexity === test.expected ? '✅' : '❌';
31 |
32 | if (result.complexity === test.expected) correct++;
33 |
34 | console.log(`${match} Query: ${test.q}`);
35 | console.log(` Expected: ${test.expected}, Got: ${result.complexity} (conf: ${result.confidence.toFixed(2)})`);
36 |
37 | if (result.metadata?.technicalTerms && result.metadata.technicalTerms.length > 0) {
38 | console.log(` Technical Terms: ${result.metadata.technicalTerms.join(', ')}`);
39 | }
40 | if (result.metadata?.domains && result.metadata.domains.size > 0) {
41 | console.log(` Domains: ${Array.from(result.metadata.domains).join(', ')}`);
42 | }
43 | console.log();
44 | }
45 |
46 | console.log('='.repeat(80));
47 | console.log(`ACCURACY: ${correct}/${total} (${((correct/total)*100).toFixed(1)}%)`);
48 | console.log('='.repeat(80));
49 |
--------------------------------------------------------------------------------
/cascadeflow/core/batch_config.py:
--------------------------------------------------------------------------------
1 | """Batch processing configuration for cascadeflow."""
2 |
3 | from dataclasses import dataclass, field
4 | from enum import Enum
5 | from typing import Any, Optional
6 |
7 |
8 | class BatchStrategy(str, Enum):
9 | """Batch processing strategy"""
10 |
11 | LITELLM_NATIVE = "litellm_native" # Use LiteLLM batch API (preferred)
12 | SEQUENTIAL = "sequential" # Sequential with concurrency control
13 | AUTO = "auto" # Auto-detect best strategy
14 |
15 |
16 | @dataclass
17 | class BatchConfig:
18 | """
19 | Configuration for batch processing.
20 |
21 | Example:
22 | config = BatchConfig(
23 | batch_size=10,
24 | max_parallel=3,
25 | timeout_per_query=30.0,
26 | strategy=BatchStrategy.AUTO
27 | )
28 | """
29 |
30 | # Batch settings
31 | batch_size: int = 10
32 | """Maximum number of queries in a single batch"""
33 |
34 | max_parallel: int = 3
35 | """Maximum number of parallel requests (fallback mode)"""
36 |
37 | timeout_per_query: float = 30.0
38 | """Timeout per query in seconds"""
39 |
40 | total_timeout: Optional[float] = None
41 | """Total timeout for entire batch (default: timeout_per_query * batch_size)"""
42 |
43 | # Strategy
44 | strategy: BatchStrategy = BatchStrategy.AUTO
45 | """Batch processing strategy"""
46 |
47 | # Error handling
48 | stop_on_error: bool = False
49 | """Stop processing batch if any query fails"""
50 |
51 | retry_failed: bool = True
52 | """Retry failed queries once"""
53 |
54 | # Cost & quality
55 | track_cost: bool = True
56 | """Track cost for each query in batch"""
57 |
58 | validate_quality: bool = True
59 | """Validate quality for each query in batch"""
60 |
61 | # Advanced
62 | preserve_order: bool = True
63 | """Preserve query order in results"""
64 |
65 | metadata: dict[str, Any] = field(default_factory=dict)
66 | """Custom metadata for batch"""
67 |
68 | def __post_init__(self):
69 | if self.total_timeout is None:
70 | self.total_timeout = self.timeout_per_query * self.batch_size
71 |
--------------------------------------------------------------------------------
/.github/assets/CF_python_color.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.github/labeler.yml:
--------------------------------------------------------------------------------
1 | # Labeler configuration for automatic PR labeling
2 |
3 | # Python
4 | 'lang: python':
5 | - changed-files:
6 | - any-glob-to-any-file:
7 | - 'cascadeflow/**/*.py'
8 | - 'tests/**/*.py'
9 | - 'examples/**/*.py'
10 | - 'pyproject.toml'
11 | - 'requirements*.txt'
12 |
13 | # TypeScript
14 | 'lang: typescript':
15 | - changed-files:
16 | - any-glob-to-any-file:
17 | - 'packages/**/*.ts'
18 | - 'packages/**/*.tsx'
19 | - 'packages/**/tsconfig.json'
20 | - 'packages/**/package.json'
21 |
22 | # n8n
23 | 'integration: n8n':
24 | - changed-files:
25 | - any-glob-to-any-file:
26 | - 'packages/integrations/n8n/**/*'
27 |
28 | # Documentation
29 | 'documentation':
30 | - changed-files:
31 | - any-glob-to-any-file:
32 | - '**/*.md'
33 | - 'docs/**/*'
34 |
35 | # Workflows
36 | 'ci/cd':
37 | - changed-files:
38 | - any-glob-to-any-file:
39 | - '.github/workflows/**/*'
40 | - '.github/**/*.yml'
41 |
42 | # Dependencies
43 | 'dependencies':
44 | - changed-files:
45 | - any-glob-to-any-file:
46 | - 'package.json'
47 | - 'pnpm-lock.yaml'
48 | - 'requirements*.txt'
49 | - 'pyproject.toml'
50 |
51 | # Tests
52 | 'tests':
53 | - changed-files:
54 | - any-glob-to-any-file:
55 | - 'tests/**/*'
56 | - 'packages/**/__tests__/**/*'
57 | - 'packages/**/*.test.ts'
58 | - 'packages/**/*.spec.ts'
59 |
60 | # Examples
61 | 'examples':
62 | - changed-files:
63 | - any-glob-to-any-file:
64 | - 'examples/**/*'
65 | - 'packages/**/examples/**/*'
66 |
67 | # Core
68 | 'core':
69 | - changed-files:
70 | - any-glob-to-any-file:
71 | - 'cascadeflow/**/*.py'
72 | - 'packages/core/src/**/*'
73 |
74 | # Providers
75 | 'providers':
76 | - changed-files:
77 | - any-glob-to-any-file:
78 | - 'cascadeflow/providers/**/*'
79 | - 'packages/core/src/providers/**/*'
80 |
81 | # Configuration
82 | 'configuration':
83 | - changed-files:
84 | - any-glob-to-any-file:
85 | - 'turbo.json'
86 | - 'pnpm-workspace.yaml'
87 | - '.eslintrc*'
88 | - '.prettierrc*'
89 | - 'ruff.toml'
90 | - 'pyproject.toml'
91 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/credentials/CascadeFlowApi.credentials.ts:
--------------------------------------------------------------------------------
1 | import {
2 | IAuthenticateGeneric,
3 | ICredentialTestRequest,
4 | ICredentialType,
5 | INodeProperties,
6 | } from 'n8n-workflow';
7 |
8 | export class CascadeFlowApi implements ICredentialType {
9 | name = 'cascadeFlowApi';
10 | displayName = 'Cascadeflow API';
11 | documentationUrl = 'https://github.com/lemony-ai/cascadeflow';
12 | properties: INodeProperties[] = [
13 | {
14 | displayName: 'OpenAI API Key',
15 | name: 'openaiApiKey',
16 | type: 'string',
17 | typeOptions: { password: true },
18 | default: '',
19 | description: 'API key for OpenAI (GPT-4, GPT-4o, etc.)',
20 | placeholder: 'sk-...',
21 | },
22 | {
23 | displayName: 'Anthropic API Key',
24 | name: 'anthropicApiKey',
25 | type: 'string',
26 | typeOptions: { password: true },
27 | default: '',
28 | description: 'API key for Anthropic (Claude models)',
29 | placeholder: 'sk-ant-...',
30 | },
31 | {
32 | displayName: 'Groq API Key',
33 | name: 'groqApiKey',
34 | type: 'string',
35 | typeOptions: { password: true },
36 | default: '',
37 | description: 'API key for Groq (fast Llama inference)',
38 | placeholder: 'gsk_...',
39 | },
40 | {
41 | displayName: 'Together AI API Key',
42 | name: 'togetherApiKey',
43 | type: 'string',
44 | typeOptions: { password: true },
45 | default: '',
46 | description: 'API key for Together AI',
47 | placeholder: '',
48 | },
49 | {
50 | displayName: 'HuggingFace API Key',
51 | name: 'huggingfaceApiKey',
52 | type: 'string',
53 | typeOptions: { password: true },
54 | default: '',
55 | description: 'API key for HuggingFace Inference',
56 | placeholder: 'hf_...',
57 | },
58 | ];
59 |
60 | authenticate: IAuthenticateGeneric = {
61 | type: 'generic',
62 | properties: {},
63 | };
64 |
65 | test: ICredentialTestRequest = {
66 | request: {
67 | baseURL: 'https://api.openai.com/v1',
68 | url: '/models',
69 | method: 'GET',
70 | headers: {
71 | Authorization: '=Bearer {{$credentials.openaiApiKey}}',
72 | },
73 | },
74 | };
75 | }
76 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@cascadeflow/langchain",
3 | "version": "0.6.5",
4 | "description": "LangChain integration for cascadeflow - Add intelligent cost optimization to your LangChain models",
5 | "author": {
6 | "name": "Lemony Inc.",
7 | "email": "hello@lemony.ai"
8 | },
9 | "license": "MIT",
10 | "repository": {
11 | "type": "git",
12 | "url": "https://github.com/lemony-ai/cascadeflow.git",
13 | "directory": "packages/langchain-cascadeflow"
14 | },
15 | "main": "./dist/index.js",
16 | "module": "./dist/index.mjs",
17 | "types": "./dist/index.d.ts",
18 | "exports": {
19 | ".": {
20 | "types": "./dist/index.d.ts",
21 | "import": "./dist/index.mjs",
22 | "require": "./dist/index.js"
23 | }
24 | },
25 | "files": [
26 | "dist",
27 | "examples",
28 | "README.md"
29 | ],
30 | "scripts": {
31 | "build": "tsup src/index.ts --format cjs,esm --dts --clean",
32 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
33 | "test": "vitest run",
34 | "test:watch": "vitest",
35 | "lint": "eslint src --ext .ts",
36 | "typecheck": "tsc --noEmit",
37 | "clean": "rm -rf dist"
38 | },
39 | "keywords": [
40 | "langchain",
41 | "cascadeflow",
42 | "ai",
43 | "llm",
44 | "cost-optimization",
45 | "model-routing",
46 | "cascade",
47 | "typescript"
48 | ],
49 | "dependencies": {
50 | "@cascadeflow/core": "workspace:^",
51 | "@cascadeflow/ml": "workspace:^",
52 | "@langchain/anthropic": "^1.0.1",
53 | "@langchain/google-genai": "^1.0.1"
54 | },
55 | "peerDependencies": {
56 | "@langchain/core": "^0.3.0",
57 | "langchain": "^0.3.0"
58 | },
59 | "peerDependenciesMeta": {
60 | "langchain": {
61 | "optional": true
62 | }
63 | },
64 | "devDependencies": {
65 | "@langchain/core": "^0.3.24",
66 | "@langchain/openai": "^0.3.17",
67 | "@types/node": "^20.10.0",
68 | "@typescript-eslint/eslint-plugin": "^6.15.0",
69 | "@typescript-eslint/parser": "^6.15.0",
70 | "eslint": "^8.55.0",
71 | "langchain": "^0.3.13",
72 | "openai": "^4.73.1",
73 | "tsup": "^8.0.1",
74 | "typescript": "^5.3.3",
75 | "vitest": "^1.0.4"
76 | },
77 | "engines": {
78 | "node": ">=18.0.0"
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/DEPRECATE_5.0.x.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Script to deprecate incorrect 5.0.x versions of @cascadeflow/n8n-nodes-cascadeflow
4 | # Run this AFTER publishing v0.5.0 to npm
5 |
6 | echo "Deprecating versions 5.0.1 through 5.0.7..."
7 | echo ""
8 | echo "⚠️ Make sure you are authenticated to npm with the correct account"
9 | echo "⚠️ Run: npm whoami"
10 | echo ""
11 | read -p "Press Enter to continue or Ctrl+C to cancel..."
12 |
13 | # Deprecate each version
14 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.1 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
15 |
16 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.2 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
17 |
18 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.3 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
19 |
20 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.4 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
21 |
22 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.5 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
23 |
24 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.6 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
25 |
26 | npm deprecate @cascadeflow/n8n-nodes-cascadeflow@5.0.7 "Version numbering error. Please uninstall and install v0.5.0 or later. See: https://github.com/lemony-ai/cascadeflow/tree/main/packages/integrations/n8n#version-migration"
27 |
28 | echo ""
29 | echo "✅ All 5.0.x versions deprecated successfully!"
30 | echo ""
31 | echo "Users will see deprecation warnings when installing these versions."
32 | echo "npm will still recommend v0.5.0 as the latest stable version."
33 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - 'v*.*.*' # Trigger on version tags like v0.1.0, v1.0.0
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | create-release:
13 | name: Create GitHub Release
14 | runs-on: ubuntu-latest
15 |
16 | steps:
17 | - name: Checkout code
18 | uses: actions/checkout@v4
19 | with:
20 | fetch-depth: 0 # Get all history for changelog
21 |
22 | - name: Set up Python
23 | uses: actions/setup-python@v5
24 | with:
25 | python-version: '3.11'
26 |
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install build twine
31 |
32 | - name: Get version from tag
33 | id: get_version
34 | run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
35 |
36 | - name: Build package
37 | run: python -m build
38 |
39 | - name: Generate changelog
40 | id: changelog
41 | run: |
42 | # Get commits since last tag
43 | PREV_TAG=$(git describe --tags --abbrev=0 HEAD^ 2>/dev/null || echo "")
44 | if [ -z "$PREV_TAG" ]; then
45 | CHANGELOG=$(git log --pretty=format:"- %s (%h)" --reverse)
46 | else
47 | CHANGELOG=$(git log ${PREV_TAG}..HEAD --pretty=format:"- %s (%h)" --reverse)
48 | fi
49 |
50 | # Write to file for multiline handling
51 | echo "$CHANGELOG" > changelog.txt
52 |
53 | - name: Create Release
54 | uses: actions/create-release@v1
55 | env:
56 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
57 | with:
58 | tag_name: ${{ github.ref }}
59 | release_name: Release ${{ steps.get_version.outputs.VERSION }}
60 | body_path: changelog.txt
61 | draft: false
62 | prerelease: false
63 |
64 | - name: Upload Release Assets
65 | uses: actions/upload-release-asset@v1
66 | env:
67 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
68 | with:
69 | upload_url: ${{ steps.create_release.outputs.upload_url }}
70 | asset_path: ./dist/cascadeflow-${{ steps.get_version.outputs.VERSION }}.tar.gz
71 | asset_name: cascadeflow-${{ steps.get_version.outputs.VERSION }}.tar.gz
72 | asset_content_type: application/gzip
--------------------------------------------------------------------------------
/examples/integrations/docker-compose.yml:
--------------------------------------------------------------------------------
1 | # Docker Compose for cascadeflow OpenTelemetry + Grafana Stack
2 | #
3 | # This stack includes:
4 | # - OpenTelemetry Collector (receives metrics)
5 | # - Prometheus (stores metrics)
6 | # - Grafana (visualizes metrics)
7 | #
8 | # Usage:
9 | # docker-compose up -d # Start stack
10 | # docker-compose down # Stop stack
11 | # docker-compose logs -f # View logs
12 | #
13 | # Access:
14 | # - Grafana: http://localhost:3000 (admin/admin)
15 | # - Prometheus: http://localhost:9090
16 |
17 | version: '3.8'
18 |
19 | services:
20 | # OpenTelemetry Collector - Receives metrics from cascadeflow
21 | otel-collector:
22 | image: otel/opentelemetry-collector:latest
23 | container_name: cascadeflow-otel-collector
24 | command: ["--config=/etc/otel-collector-config.yaml"]
25 | volumes:
26 | - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
27 | ports:
28 | - "4318:4318" # OTLP HTTP receiver
29 | - "4317:4317" # OTLP gRPC receiver
30 | - "8888:8888" # Prometheus metrics exposed by the collector
31 | - "8889:8889" # Prometheus exporter metrics
32 | networks:
33 | - cascadeflow
34 |
35 | # Prometheus - Stores metrics
36 | prometheus:
37 | image: prom/prometheus:latest
38 | container_name: cascadeflow-prometheus
39 | command:
40 | - '--config.file=/etc/prometheus/prometheus.yml'
41 | - '--storage.tsdb.path=/prometheus'
42 | - '--web.console.libraries=/usr/share/prometheus/console_libraries'
43 | - '--web.console.templates=/usr/share/prometheus/consoles'
44 | volumes:
45 | - ./prometheus.yml:/etc/prometheus/prometheus.yml
46 | - prometheus-data:/prometheus
47 | ports:
48 | - "9090:9090"
49 | networks:
50 | - cascadeflow
51 | depends_on:
52 | - otel-collector
53 |
54 | # Grafana - Visualizes metrics
55 | grafana:
56 | image: grafana/grafana:latest
57 | container_name: cascadeflow-grafana
58 | environment:
59 | - GF_SECURITY_ADMIN_PASSWORD=admin
60 | - GF_SECURITY_ADMIN_USER=admin
61 | - GF_USERS_ALLOW_SIGN_UP=false
62 | volumes:
63 | - grafana-data:/var/lib/grafana
64 | - ./grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml
65 | ports:
66 | - "3000:3000"
67 | networks:
68 | - cascadeflow
69 | depends_on:
70 | - prometheus
71 |
72 | networks:
73 | cascadeflow:
74 | driver: bridge
75 |
76 | volumes:
77 | prometheus-data:
78 | grafana-data:
79 |
--------------------------------------------------------------------------------
/cascadeflow/providers/deepseek.py:
--------------------------------------------------------------------------------
1 | """DeepSeek provider implementation.
2 |
3 | DeepSeek uses an OpenAI-compatible API, making it easy to integrate.
4 | The provider extends the OpenAI provider with DeepSeek-specific configuration.
5 |
6 | Environment Variables:
7 | DEEPSEEK_API_KEY: Your DeepSeek API key
8 |
9 | Models:
10 | - deepseek-coder: Specialized for code generation and understanding
11 | - deepseek-chat: General-purpose chat model
12 |
13 | Example:
14 | >>> from cascadeflow import CascadeAgent, ModelConfig
15 | >>> agent = CascadeAgent(
16 | ... models=[
17 | ... ModelConfig(name="deepseek-coder", provider="deepseek", cost=0.00014),
18 | ... ]
19 | ... )
20 | """
21 |
22 | import os
23 | from typing import Optional
24 |
25 | from .openai import OpenAIProvider
26 |
27 |
28 | class DeepSeekProvider(OpenAIProvider):
29 | """
30 | DeepSeek provider using OpenAI-compatible API.
31 |
32 | DeepSeek models are particularly strong at:
33 | - Code generation and understanding
34 | - Mathematical reasoning
35 | - General chat
36 |
37 | The API is fully compatible with OpenAI's API format.
38 | All methods are inherited from OpenAIProvider - only the
39 | base URL and API key source are different.
40 | """
41 |
42 | # DeepSeek API base URL
43 | BASE_URL = "https://api.deepseek.com"
44 |
45 | def __init__(
46 | self,
47 | api_key: Optional[str] = None,
48 | base_url: Optional[str] = None,
49 | **kwargs,
50 | ):
51 | """
52 | Initialize DeepSeek provider.
53 |
54 | Args:
55 | api_key: DeepSeek API key (defaults to DEEPSEEK_API_KEY env var)
56 | base_url: Custom base URL (defaults to DeepSeek API)
57 | **kwargs: Additional OpenAI provider options
58 | """
59 | # Get API key from environment if not provided
60 | deepseek_api_key = api_key or os.getenv("DEEPSEEK_API_KEY")
61 |
62 | if not deepseek_api_key:
63 | raise ValueError(
64 | "DeepSeek API key not found. "
65 | "Set DEEPSEEK_API_KEY environment variable or pass api_key parameter."
66 | )
67 |
68 | # Initialize parent OpenAI provider with DeepSeek API key
69 | super().__init__(api_key=deepseek_api_key, **kwargs)
70 |
71 | # Override base URL to use DeepSeek API
72 | self.base_url = base_url or self.BASE_URL
73 |
74 | @property
75 | def name(self) -> str:
76 | """Provider name."""
77 | return "deepseek"
78 |
--------------------------------------------------------------------------------
/cascadeflow/integrations/langchain/__init__.py:
--------------------------------------------------------------------------------
1 | """CascadeFlow LangChain Integration.
2 |
3 | Transparent wrapper for LangChain chat models with intelligent cascade logic
4 | for cost optimization.
5 |
6 | Example:
7 | >>> from langchain_openai import ChatOpenAI
8 | >>> from cascadeflow.langchain import CascadeFlow
9 | >>>
10 | >>> drafter = ChatOpenAI(model='gpt-4o-mini')
11 | >>> verifier = ChatOpenAI(model='gpt-4o')
12 | >>>
13 | >>> cascade = CascadeFlow(
14 | ... drafter=drafter,
15 | ... verifier=verifier,
16 | ... quality_threshold=0.7
17 | ... )
18 | >>>
19 | >>> result = await cascade.ainvoke("What is TypeScript?")
20 | """
21 |
22 | from .wrapper import CascadeFlow, with_cascade
23 | from .types import CascadeConfig, CascadeResult, CostMetadata, TokenUsage
24 | from .utils import (
25 | calculate_quality,
26 | calculate_cost,
27 | calculate_savings,
28 | create_cost_metadata,
29 | extract_token_usage,
30 | MODEL_PRICING,
31 | )
32 |
33 | # Model discovery utilities - optional feature
34 | from .models import (
35 | MODEL_PRICING_REFERENCE,
36 | analyze_cascade_pair,
37 | suggest_cascade_pairs,
38 | discover_cascade_pairs,
39 | analyze_model,
40 | compare_models,
41 | find_best_cascade_pair,
42 | validate_cascade_pair,
43 | extract_model_name,
44 | get_provider,
45 | )
46 | from .cost_tracking import (
47 | BudgetTracker,
48 | CostHistory,
49 | CostEntry,
50 | track_costs,
51 | )
52 | from .langchain_callbacks import (
53 | CascadeFlowCallbackHandler,
54 | get_cascade_callback,
55 | )
56 |
57 | __all__ = [
58 | # Main classes
59 | "CascadeFlow",
60 | "with_cascade",
61 | # Types
62 | "CascadeConfig",
63 | "CascadeResult",
64 | "CostMetadata",
65 | "TokenUsage",
66 | # Utilities
67 | "calculate_quality",
68 | "calculate_cost",
69 | "calculate_savings",
70 | "create_cost_metadata",
71 | "extract_token_usage",
72 | "MODEL_PRICING",
73 | # Model discovery
74 | "MODEL_PRICING_REFERENCE",
75 | "analyze_cascade_pair",
76 | "suggest_cascade_pairs",
77 | "discover_cascade_pairs",
78 | "analyze_model",
79 | "compare_models",
80 | "find_best_cascade_pair",
81 | "validate_cascade_pair",
82 | "extract_model_name",
83 | "get_provider",
84 | # Cost tracking (Python-specific features)
85 | "BudgetTracker",
86 | "CostHistory",
87 | "CostEntry",
88 | "track_costs",
89 | # LangChain callback handlers
90 | "CascadeFlowCallbackHandler",
91 | "get_cascade_callback",
92 | ]
93 |
--------------------------------------------------------------------------------
/packages/core/quick-perf-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Quick Performance Test - 3 runs for cost efficiency
3 | # Simplified version for faster analysis
4 |
5 | set -e
6 |
7 | RUNS=3
8 | OUTPUT_DIR="/tmp/cascadeflow-quickperf-$(date +%Y%m%d-%H%M%S)"
9 | mkdir -p "$OUTPUT_DIR"
10 |
11 | echo "╔═══════════════════════════════════════════════════════════════╗"
12 | echo "║ cascadeflow Quick Performance Test (3 runs) ║"
13 | echo "╚═══════════════════════════════════════════════════════════════╝"
14 | echo ""
15 |
16 | # Load environment
17 | if [ -f "../../.env" ]; then
18 | set -a
19 | source ../../.env
20 | set +a
21 | fi
22 |
23 | declare -a savings_array
24 | declare -a cost_array
25 |
26 | for i in {1..3}; do
27 | echo ""
28 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
29 | echo "Run $i/3"
30 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
31 |
32 | OUTPUT_FILE="$OUTPUT_DIR/run-$i.log"
33 |
34 | if npx tsx examples/nodejs/basic-usage.ts > "$OUTPUT_FILE" 2>&1; then
35 | echo "✓ Run $i completed"
36 |
37 | # Extract savings percentage
38 | savings=$(grep "💰 SAVINGS:" "$OUTPUT_FILE" | awk '{print $3}' | sed 's/[($%)]//g' || echo "0")
39 | cost=$(grep "Total Cost:" "$OUTPUT_FILE" | awk '{print $3}' | sed 's/\$//g' || echo "0")
40 |
41 | savings_array+=("$savings")
42 | cost_array+=("$cost")
43 |
44 | echo " Savings: ${savings}%"
45 | echo " Cost: \$${cost}"
46 | else
47 | echo "✗ Run $i failed"
48 | fi
49 |
50 | # Delay between runs
51 | if [ $i -lt 3 ]; then
52 | sleep 3
53 | fi
54 | done
55 |
56 | echo ""
57 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
58 | echo "📊 RESULTS"
59 | echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
60 |
61 | # Calculate average
62 | sum=0
63 | count=0
64 | for val in "${savings_array[@]}"; do
65 | if [ -n "$val" ]; then
66 | sum=$(echo "$sum + $val" | bc)
67 | count=$((count + 1))
68 | fi
69 | done
70 |
71 | if [ $count -gt 0 ]; then
72 | avg=$(echo "scale=2; $sum / $count" | bc)
73 | else
74 | avg="0"
75 | fi
76 |
77 | echo ""
78 | echo "Average Savings: ${avg}%"
79 | echo "Target: 40-60%"
80 | if (( $(echo "$avg >= 40" | bc -l) )); then
81 | echo "Status: ✓ Target achieved!"
82 | else
83 | echo "Status: ⚠ Below target"
84 | fi
85 |
86 | echo ""
87 | echo "Individual runs:"
88 | for i in {1..3}; do
89 | if [ -n "${savings_array[$i-1]}" ]; then
90 | echo " Run $i: ${savings_array[$i-1]}% (Cost: \$${cost_array[$i-1]})"
91 | fi
92 | done
93 |
94 | echo ""
95 | echo "Logs: $OUTPUT_DIR"
96 | echo ""
97 |
--------------------------------------------------------------------------------
/.github/assets/Lemony_logo_dark.svg:
--------------------------------------------------------------------------------
1 |
2 |
23 |
--------------------------------------------------------------------------------
/.github/assets/Lemony_logo_bright.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/src/index.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * @module @cascadeflow/langchain
3 | *
4 | * LangChain integration for cascadeflow - Add intelligent cost optimization
5 | * to your existing LangChain models without reconfiguration.
6 | *
7 | * @example
8 | * ```typescript
9 | * import { withCascade } from '@cascadeflow/langchain';
10 | * import { ChatOpenAI } from '@langchain/openai';
11 | *
12 | * // Your existing models
13 | * const drafter = new ChatOpenAI({ model: 'gpt-4o-mini' });
14 | * const verifier = new ChatOpenAI({ model: 'gpt-4o' });
15 | *
16 | * // Wrap with cascade (2 lines!)
17 | * const cascadeModel = withCascade({
18 | * drafter,
19 | * verifier,
20 | * qualityThreshold: 0.7
21 | * });
22 | *
23 | * // Use like any LangChain model - all features preserved!
24 | * const result = await cascadeModel.invoke("What is TypeScript?");
25 | * console.log(result);
26 | *
27 | * // Check cascade stats
28 | * const stats = cascadeModel.getLastCascadeResult();
29 | * console.log(`Saved: ${stats.savingsPercentage}%`);
30 | * ```
31 | */
32 |
33 | export { CascadeFlow } from './wrapper.js';
34 | export type { CascadeConfig, CascadeResult, CostMetadata } from './types.js';
35 | export * from './utils.js';
36 | export { analyzeCascadePair, suggestCascadePairs } from './helpers.js';
37 | export type { CascadeAnalysis } from './helpers.js';
38 |
39 | // Routers and complexity detection
40 | export { PreRouter, createPreRouter } from './routers/pre-router.js';
41 | export type { PreRouterConfig, PreRouterStats } from './routers/pre-router.js';
42 | export { Router, RoutingStrategy, RoutingDecisionHelper, RouterChain } from './routers/base.js';
43 | export type { RoutingDecision } from './routers/base.js';
44 | export { ComplexityDetector } from './complexity.js';
45 | export type { QueryComplexity, ComplexityResult } from './complexity.js';
46 |
47 | // Model discovery (works with YOUR models!)
48 | export {
49 | MODEL_PRICING_REFERENCE,
50 | discoverCascadePairs,
51 | analyzeModel,
52 | compareModels,
53 | findBestCascadePair,
54 | validateCascadePair,
55 | } from './models.js';
56 |
57 | import { CascadeFlow } from './wrapper.js';
58 | import type { CascadeConfig } from './types.js';
59 |
60 | /**
61 | * Convenient helper to create a CascadeFlow model
62 | *
63 | * @param config - Cascade configuration with drafter/verifier models
64 | * @returns A wrapped model that cascades from drafter to verifier
65 | *
66 | * @example
67 | * ```typescript
68 | * const model = withCascade({
69 | * drafter: new ChatOpenAI({ model: 'gpt-4o-mini' }),
70 | * verifier: new ChatOpenAI({ model: 'gpt-4o' }),
71 | * qualityThreshold: 0.7
72 | * });
73 | * ```
74 | */
75 | export function withCascade(config: CascadeConfig): CascadeFlow {
76 | return new CascadeFlow(config);
77 | }
78 |
--------------------------------------------------------------------------------
/examples/guardrails_usage.py:
--------------------------------------------------------------------------------
1 | """
2 | Example: Guardrails (Content Moderation + PII Detection) v0.2.1
3 |
4 | Demonstrates content safety and PII detection for production use.
5 | """
6 |
7 | import asyncio
8 |
9 | from cascadeflow import (
10 | GuardrailsManager,
11 | TierLevel,
12 | UserProfile,
13 | )
14 |
15 |
16 | async def main():
17 | print("=" * 60)
18 | print("cascadeflow v0.2.1 - Guardrails")
19 | print("=" * 60)
20 |
21 | # Create profile with guardrails enabled
22 | profile = UserProfile.from_tier(
23 | TierLevel.PRO,
24 | user_id="secure_user",
25 | enable_content_moderation=True,
26 | enable_pii_detection=True,
27 | )
28 |
29 | print("\nUser profile:")
30 | print(f" Tier: {profile.tier.name}")
31 | print(f" Content moderation: {profile.enable_content_moderation}")
32 | print(f" PII detection: {profile.enable_pii_detection}")
33 |
34 | # Initialize guardrails manager
35 | manager = GuardrailsManager()
36 |
37 | # Example 1: Safe content
38 | print("\n1. Safe content check")
39 | print("-" * 60)
40 | safe_text = "What is the capital of France?"
41 | result = await manager.check_content(safe_text, profile)
42 | print(f"Text: {safe_text}")
43 | print(f"Safe: {result.is_safe}")
44 |
45 | # Example 2: PII detection
46 | print("\n2. PII detection")
47 | print("-" * 60)
48 | pii_text = "My email is john.doe@example.com and phone is 555-123-4567"
49 | result = await manager.check_content(pii_text, profile)
50 | print(f"Text: {pii_text}")
51 | print(f"Safe: {result.is_safe}")
52 | if result.pii_detected:
53 | print(f"PII detected: {len(result.pii_detected)} matches")
54 | for match in result.pii_detected:
55 | print(f" - {match.pii_type}: {match.value}")
56 |
57 | # Example 3: PII redaction
58 | print("\n3. PII redaction")
59 | print("-" * 60)
60 | redacted_text, matches = await manager.redact_pii(pii_text, profile)
61 | print(f"Original: {pii_text}")
62 | print(f"Redacted: {redacted_text}")
63 |
64 | # Example 4: Disable guardrails
65 | print("\n4. Disabled guardrails")
66 | print("-" * 60)
67 | no_guards_profile = UserProfile.from_tier(
68 | TierLevel.FREE,
69 | user_id="basic_user",
70 | enable_content_moderation=False,
71 | enable_pii_detection=False,
72 | )
73 | result = await manager.check_content(pii_text, no_guards_profile)
74 | print(f"Content moderation: {no_guards_profile.enable_content_moderation}")
75 | print(f"PII detection: {no_guards_profile.enable_pii_detection}")
76 | print(f"Result: {result.is_safe} (guardrails disabled)")
77 |
78 | print("\n" + "=" * 60)
79 | print("Guardrails examples completed!")
80 | print("=" * 60)
81 |
82 |
83 | if __name__ == "__main__":
84 | asyncio.run(main())
85 |
--------------------------------------------------------------------------------
/cascadeflow/integrations/langchain/types.py:
--------------------------------------------------------------------------------
1 | """Type definitions for CascadeFlow LangChain integration."""
2 |
3 | from typing import Optional, TypedDict
4 |
5 |
6 | class TokenUsage(TypedDict):
7 | """Token usage tracking."""
8 |
9 | input: int
10 | output: int
11 |
12 |
13 | class CostMetadata(TypedDict, total=False):
14 | """Cost tracking metadata for cascade execution.
15 |
16 | Attributes:
17 | drafter_tokens: Token usage for drafter model
18 | verifier_tokens: Token usage for verifier model (optional)
19 | drafter_cost: Cost of drafter execution in USD
20 | verifier_cost: Cost of verifier execution in USD
21 | total_cost: Total cost in USD
22 | savings_percentage: Savings percentage vs. always using verifier
23 | model_used: Which model was used ('drafter' or 'verifier')
24 | accepted: Whether drafter response was accepted
25 | drafter_quality: Quality score of drafter response (0-1)
26 | """
27 |
28 | drafter_tokens: TokenUsage
29 | verifier_tokens: Optional[TokenUsage]
30 | drafter_cost: float
31 | verifier_cost: float
32 | total_cost: float
33 | savings_percentage: float
34 | model_used: str
35 | accepted: bool
36 | drafter_quality: float
37 |
38 |
39 | class CascadeResult(TypedDict):
40 | """Result of cascade execution.
41 |
42 | Attributes:
43 | content: Final response content
44 | model_used: Which model was used ('drafter' or 'verifier')
45 | accepted: Whether drafter response was accepted
46 | drafter_quality: Quality score of drafter response (0-1)
47 | drafter_cost: Cost of drafter execution in USD
48 | verifier_cost: Cost of verifier execution in USD
49 | total_cost: Total cost in USD
50 | savings_percentage: Savings percentage vs. always using verifier
51 | latency_ms: Total latency in milliseconds
52 | """
53 |
54 | content: str
55 | model_used: str
56 | accepted: bool
57 | drafter_quality: float
58 | drafter_cost: float
59 | verifier_cost: float
60 | total_cost: float
61 | savings_percentage: float
62 | latency_ms: float
63 |
64 |
65 | class CascadeConfig(TypedDict, total=False):
66 | """Configuration for cascade behavior.
67 |
68 | Attributes:
69 | quality_threshold: Quality threshold for accepting drafter responses (0-1)
70 | enable_cost_tracking: Enable automatic cost tracking
71 | cost_tracking_provider: Cost tracking provider ('langsmith' or 'cascadeflow')
72 | enable_pre_router: Enable pre-routing based on query complexity
73 | cascade_complexities: Complexity levels that should use cascade
74 | """
75 |
76 | quality_threshold: float
77 | enable_cost_tracking: bool
78 | cost_tracking_provider: str
79 | enable_pre_router: bool
80 | cascade_complexities: list[str]
81 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/examples/streaming-cascade.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Streaming Cascade Example
3 | *
4 | * Demonstrates real-time streaming with CascadeFlow:
5 | * 1. Stream drafter optimistically (user sees output immediately)
6 | * 2. Check quality after drafter completes
7 | * 3. If quality insufficient, show switch message and stream verifier
8 | */
9 |
10 | import { ChatOpenAI } from '@langchain/openai';
11 | import { withCascade } from '../src/index.js';
12 |
13 | async function main() {
14 | console.log('🌊 CascadeFlow Streaming Example\n');
15 |
16 | // Configure cascade with drafter and verifier
17 | const cascade = withCascade({
18 | drafter: new ChatOpenAI({
19 | model: 'gpt-4o-mini',
20 | temperature: 0.7,
21 | }),
22 | verifier: new ChatOpenAI({
23 | model: 'gpt-4o',
24 | temperature: 0.7,
25 | }),
26 | qualityThreshold: 0.7,
27 | });
28 |
29 | // Example 1: Simple query (likely accepted by drafter)
30 | console.log('Example 1: Simple Query (likely cascaded)\n');
31 | console.log('Q: What is 2+2?\n');
32 | console.log('A: ');
33 |
34 | const stream1 = await cascade.stream('What is 2+2?');
35 |
36 | for await (const chunk of stream1) {
37 | const content = typeof chunk.content === 'string' ? chunk.content : '';
38 | process.stdout.write(content);
39 | }
40 |
41 | console.log('\n\n---\n');
42 |
43 | // Example 2: Complex query (likely escalated to verifier)
44 | console.log('Example 2: Complex Query (may escalate)\n');
45 | console.log('Q: Explain quantum entanglement and its implications for quantum computing\n');
46 | console.log('A: ');
47 |
48 | const stream2 = await cascade.stream(
49 | 'Explain quantum entanglement and its implications for quantum computing'
50 | );
51 |
52 | for await (const chunk of stream2) {
53 | const content = typeof chunk.content === 'string' ? chunk.content : '';
54 | process.stdout.write(content);
55 | }
56 |
57 | console.log('\n\n---\n');
58 |
59 | // Example 3: Low quality query (forces cascade)
60 | console.log('Example 3: Ambiguous Query (likely escalates)\n');
61 | console.log('Q: Tell me about it\n');
62 | console.log('A: ');
63 |
64 | const stream3 = await cascade.stream('Tell me about it');
65 |
66 | for await (const chunk of stream3) {
67 | const content = typeof chunk.content === 'string' ? chunk.content : '';
68 | process.stdout.write(content);
69 | }
70 |
71 | console.log('\n\n---\n');
72 |
73 | // Show final cascade statistics
74 | const stats = cascade.getLastCascadeResult();
75 | if (stats) {
76 | console.log('\n📊 Cascade Statistics:');
77 | console.log(` Model Used: ${stats.modelUsed}`);
78 | console.log(` Drafter Quality: ${stats.drafterQuality.toFixed(2)}`);
79 | console.log(` Accepted: ${stats.accepted}`);
80 | console.log(` Latency: ${stats.latencyMs}ms`);
81 | }
82 | }
83 |
84 | main().catch(console.error);
85 |
--------------------------------------------------------------------------------
/scripts/test-typescript-examples.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Test TypeScript examples with proper workspace setup
3 | #
4 | # This script:
5 | # 1. Ensures the package is built
6 | # 2. Loads environment variables
7 | # 3. Runs each TypeScript example
8 | # 4. Reports results
9 |
10 | set -e
11 |
12 | echo "====================================="
13 | echo "TypeScript Examples Testing"
14 | echo "====================================="
15 |
16 | # Navigate to examples directory
17 | cd "$(dirname "$0")/../packages/core/examples" || exit 1
18 |
19 | # Build parent package if needed
20 | if [ ! -d "../dist" ]; then
21 | echo "📦 Building @cascadeflow/core..."
22 | cd ..
23 | pnpm build
24 | cd examples
25 | fi
26 |
27 | # Load environment variables
28 | if [ -f "../../../.env" ]; then
29 | set -a
30 | source ../../../.env
31 | set +a
32 | echo "✅ Environment variables loaded"
33 | else
34 | echo "⚠️ No .env file found"
35 | fi
36 |
37 | echo ""
38 | echo "Running TypeScript examples..."
39 | echo "-------------------------------------"
40 |
41 | PASSED=0
42 | FAILED=0
43 |
44 | # Test Node.js examples
45 | NODE_EXAMPLES=(
46 | "nodejs/basic-usage.ts"
47 | "nodejs/tool-calling.ts"
48 | "nodejs/multi-provider.ts"
49 | "nodejs/production-patterns.ts"
50 | )
51 |
52 | for example in "${NODE_EXAMPLES[@]}"; do
53 | echo ""
54 | echo "Testing: $example"
55 |
56 | if npx tsx "$example" > /dev/null 2>&1; then
57 | echo "✅ PASSED"
58 | ((PASSED++))
59 | else
60 | echo "❌ FAILED"
61 | # Show error
62 | npx tsx "$example" 2>&1 | tail -20
63 | ((FAILED++))
64 | fi
65 | done
66 |
67 | # Test streaming example
68 | if [ -f "streaming.ts" ]; then
69 | echo ""
70 | echo "Testing: streaming.ts"
71 |
72 | if npx tsx streaming.ts > /dev/null 2>&1; then
73 | echo "✅ PASSED"
74 | ((PASSED++))
75 | else
76 | echo "❌ FAILED"
77 | npx tsx streaming.ts 2>&1 | tail -20
78 | ((FAILED++))
79 | fi
80 | fi
81 |
82 | # Validate browser example (compile only)
83 | if [ -f "browser/vercel-edge/api/chat.ts" ]; then
84 | echo ""
85 | echo "Validating: browser/vercel-edge/api/chat.ts"
86 |
87 | # Just check if it compiles
88 | if npx tsc --noEmit browser/vercel-edge/api/chat.ts 2>/dev/null; then
89 | echo "✅ VALIDATED (compiles)"
90 | ((PASSED++))
91 | else
92 | echo "⚠️ VALIDATION WARNING"
93 | npx tsc --noEmit browser/vercel-edge/api/chat.ts 2>&1 | tail -10
94 | # Don't count as failure - browser examples need special env
95 | fi
96 | fi
97 |
98 | echo ""
99 | echo "====================================="
100 | echo "Summary:"
101 | echo " ✅ Passed: $PASSED"
102 | echo " ❌ Failed: $FAILED"
103 | echo "====================================="
104 |
105 | exit $FAILED
106 |
--------------------------------------------------------------------------------
/packages/core/test-examples.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Comprehensive example test runner
3 | # Tests TypeScript examples with proper environment setup
4 |
5 | set -e
6 |
7 | # Colors for output
8 | RED='\033[0;31m'
9 | GREEN='\033[0;32m'
10 | YELLOW='\033[1;33m'
11 | NC='\033[0m' # No Color
12 |
13 | # Load environment variables from root .env
14 | if [ -f "../../.env" ]; then
15 | echo "📋 Loading environment from ../../.env"
16 | set -a
17 | source ../../.env
18 | set +a
19 | else
20 | echo "⚠️ Warning: ../../.env not found"
21 | fi
22 |
23 | # Track results
24 | PASSED=0
25 | FAILED=0
26 | SKIPPED=0
27 | declare -a FAILED_EXAMPLES
28 |
29 | # Function to test an example
30 | test_example() {
31 | local example=$1
32 | local timeout=${2:-60}
33 |
34 | echo ""
35 | echo "=========================================="
36 | echo "Testing: $example"
37 | echo "=========================================="
38 |
39 | if [ ! -f "examples/nodejs/$example" ]; then
40 | echo -e "${YELLOW}⏭️ SKIPPED${NC}: File not found"
41 | ((SKIPPED++))
42 | return
43 | fi
44 |
45 | # Run example (no timeout on macOS)
46 | if npx tsx "examples/nodejs/$example" > "/tmp/test-$example.log" 2>&1; then
47 | echo -e "${GREEN}✅ PASSED${NC}"
48 | ((PASSED++))
49 | # Show last 20 lines of output
50 | echo "--- Last 20 lines of output ---"
51 | tail -20 "/tmp/test-$example.log"
52 | else
53 | EXIT_CODE=$?
54 | echo -e "${RED}❌ FAILED${NC} (exit code: $EXIT_CODE)"
55 | ((FAILED++))
56 | FAILED_EXAMPLES+=("$example")
57 | # Show last 50 lines for debugging
58 | echo "--- Last 50 lines of output ---"
59 | tail -50 "/tmp/test-$example.log"
60 | fi
61 | }
62 |
63 | # Phase 1: Test remaining untested examples
64 | echo "🚀 PHASE 1: Testing Remaining Examples"
65 | echo "========================================"
66 |
67 | test_example "cost-tracking.ts" 60
68 | test_example "free-models-cascade.ts" 60
69 | test_example "multi-instance-ollama.ts" 120
70 | test_example "multi-instance-vllm.ts" 120
71 | test_example "production-patterns.ts" 90
72 | test_example "reasoning-models.ts" 120
73 | test_example "semantic-quality.ts" 90
74 | test_example "user-profiles-workflows.ts" 60
75 | test_example "factory-methods.ts" 60
76 |
77 | # Summary
78 | echo ""
79 | echo "=========================================="
80 | echo "📊 TEST SUMMARY"
81 | echo "=========================================="
82 | echo -e "${GREEN}Passed: $PASSED${NC}"
83 | echo -e "${RED}Failed: $FAILED${NC}"
84 | echo -e "${YELLOW}Skipped: $SKIPPED${NC}"
85 | echo ""
86 |
87 | if [ $FAILED -gt 0 ]; then
88 | echo "Failed examples:"
89 | for example in "${FAILED_EXAMPLES[@]}"; do
90 | echo " ❌ $example"
91 | done
92 | echo ""
93 | echo "Logs available in /tmp/test-*.log"
94 | exit 1
95 | else
96 | echo "✅ All tests passed!"
97 | exit 0
98 | fi
99 |
--------------------------------------------------------------------------------
/packages/core/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@cascadeflow/core",
3 | "version": "0.6.5",
4 | "description": "cascadeflow TypeScript library - Smart AI model cascading for cost optimization",
5 | "author": {
6 | "name": "Lemony Inc.",
7 | "email": "hello@lemony.ai"
8 | },
9 | "license": "MIT",
10 | "repository": {
11 | "type": "git",
12 | "url": "https://github.com/lemony-ai/cascadeflow.git",
13 | "directory": "packages/core"
14 | },
15 | "main": "./dist/index.js",
16 | "module": "./dist/index.mjs",
17 | "types": "./dist/index.d.ts",
18 | "exports": {
19 | ".": {
20 | "import": "./dist/index.mjs",
21 | "require": "./dist/index.js",
22 | "types": "./dist/index.d.ts"
23 | }
24 | },
25 | "files": [
26 | "dist",
27 | "README.md"
28 | ],
29 | "scripts": {
30 | "build": "tsup src/index.ts --format cjs,esm --dts --clean",
31 | "dev": "tsup src/index.ts --format cjs,esm --dts --watch",
32 | "test": "vitest run",
33 | "test:watch": "vitest",
34 | "lint": "eslint src --ext .ts",
35 | "typecheck": "tsc --noEmit",
36 | "typecheck:examples": "tsc --noEmit -p examples/nodejs/tsconfig.json",
37 | "docs": "typedoc",
38 | "docs:watch": "typedoc --watch",
39 | "clean": "rm -rf dist"
40 | },
41 | "keywords": [
42 | "ai",
43 | "llm",
44 | "cost-optimization",
45 | "model-routing",
46 | "cascade",
47 | "openai",
48 | "anthropic",
49 | "groq",
50 | "huggingface",
51 | "ollama",
52 | "vllm",
53 | "together",
54 | "typescript",
55 | "browser",
56 | "edge-functions"
57 | ],
58 | "dependencies": {
59 | "@cascadeflow/ml": "^0.6.5"
60 | },
61 | "peerDependencies": {
62 | "@anthropic-ai/sdk": "^0.30.0",
63 | "@huggingface/inference": "^2.8.0",
64 | "@xenova/transformers": "^2.17.2",
65 | "groq-sdk": "^0.5.0",
66 | "openai": "^4.0.0"
67 | },
68 | "peerDependenciesMeta": {
69 | "openai": {
70 | "optional": true
71 | },
72 | "@anthropic-ai/sdk": {
73 | "optional": true
74 | },
75 | "groq-sdk": {
76 | "optional": true
77 | },
78 | "@huggingface/inference": {
79 | "optional": true
80 | },
81 | "@xenova/transformers": {
82 | "optional": true
83 | }
84 | },
85 | "devDependencies": {
86 | "@anthropic-ai/sdk": "^0.30.0",
87 | "@huggingface/inference": "^2.8.0",
88 | "@types/express": "^5.0.5",
89 | "@types/node": "^20.10.0",
90 | "@typescript-eslint/eslint-plugin": "^6.15.0",
91 | "@typescript-eslint/parser": "^6.15.0",
92 | "eslint": "^8.55.0",
93 | "express": "^5.1.0",
94 | "groq-sdk": "^0.5.0",
95 | "openai": "^4.73.1",
96 | "tsup": "^8.0.1",
97 | "typedoc": "^0.28.14",
98 | "typescript": "^5.3.3",
99 | "vitest": "^1.0.4",
100 | "zod": "^4.1.12"
101 | },
102 | "engines": {
103 | "node": ">=18.0.0"
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/packages/core/tests/basic-test.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Basic integration test for cascadeflow TypeScript library
3 | *
4 | * This test validates:
5 | * - Package imports work correctly
6 | * - OpenAI provider initializes
7 | * - Cascade logic executes
8 | * - Real API calls work
9 | * - Cost tracking functions
10 | */
11 |
12 | import { describe, it, expect } from 'vitest';
13 | import { CascadeAgent } from '../src';
14 |
15 | // Check for API key
16 | const apiKey = process.env.OPENAI_API_KEY;
17 | const hasApiKey = !!apiKey;
18 |
19 | describe('Basic Integration Tests', () => {
20 | it.skipIf(!hasApiKey)('should run basic cascade with OpenAI', async () => {
21 | console.log('🧪 Testing cascadeflow TypeScript Library\n');
22 |
23 | console.log('✅ OpenAI API key found');
24 | console.log(` Key: ${apiKey!.substring(0, 10)}...${apiKey!.substring(apiKey!.length - 4)}\n`);
25 |
26 | // Create agent with two-tier cascade
27 | console.log('📦 Creating CascadeAgent...');
28 | const agent = new CascadeAgent({
29 | models: [
30 | {
31 | name: 'gpt-4o-mini',
32 | provider: 'openai',
33 | cost: 0.00015,
34 | apiKey,
35 | },
36 | {
37 | name: 'gpt-4o',
38 | provider: 'openai',
39 | cost: 0.00625,
40 | apiKey,
41 | },
42 | ],
43 | });
44 |
45 | console.log(`✅ Agent created with ${agent.getModelCount()} models`);
46 | console.log(` Models: ${agent.getModels().map(m => m.name).join(' → ')}\n`);
47 |
48 | // Test 1: Simple query (should use draft model)
49 | console.log('🔍 Test 1: Simple query (expect draft model)');
50 | console.log(' Query: "What is TypeScript?"\n');
51 |
52 | const startTime = Date.now();
53 | const result1 = await agent.run('What is TypeScript?');
54 | const elapsed = Date.now() - startTime;
55 |
56 | console.log('📊 Result:');
57 | console.log(` Model used: ${result1.modelUsed}`);
58 | console.log(` Response: ${result1.content.substring(0, 100)}...`);
59 | console.log(` Cost: $${result1.totalCost.toFixed(6)}`);
60 | console.log(` Latency: ${elapsed}ms`);
61 | console.log(` Cascaded: ${result1.cascaded ? 'Yes' : 'No'}`);
62 | console.log(` Draft accepted: ${result1.draftAccepted ? 'Yes' : 'No'}`);
63 |
64 | if (result1.savingsPercentage !== undefined) {
65 | console.log(` Savings: ${result1.savingsPercentage.toFixed(1)}%`);
66 | }
67 |
68 | console.log('');
69 |
70 | // Validate result
71 | expect(result1.content).toBeTruthy();
72 | expect(result1.content.length).toBeGreaterThan(0);
73 | expect(result1.totalCost).toBeGreaterThan(0);
74 |
75 | console.log('✅ Test 1 passed!\n');
76 |
77 | // Summary
78 | console.log('═══════════════════════════════════════════');
79 | console.log('🎉 All tests passed!');
80 | console.log('═══════════════════════════════════════════');
81 | console.log(`Total cost: $${result1.totalCost.toFixed(6)}`);
82 | console.log(`Total time: ${elapsed}ms`);
83 | console.log('');
84 | }, 30000); // 30 second timeout for API calls
85 | });
86 |
--------------------------------------------------------------------------------
/cascadeflow/schema/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Data schemas and configuration for cascadeflow.
3 |
4 | This module contains:
5 | - Configuration dataclasses (ModelConfig, CascadeConfig, etc.)
6 | - Domain configuration (DomainConfig, DomainValidationMethod)
7 | - Model registry (ModelRegistry, ModelRegistryEntry)
8 | - Result dataclasses (CascadeResult)
9 | - Custom exceptions
10 | """
11 |
12 | from .config import (
13 | DEFAULT_TIERS,
14 | EXAMPLE_WORKFLOWS,
15 | CascadeConfig,
16 | LatencyProfile,
17 | ModelConfig,
18 | OptimizationWeights,
19 | UserTier,
20 | WorkflowProfile,
21 | )
22 | from .domain_config import (
23 | BUILTIN_DOMAIN_CONFIGS,
24 | DomainConfig,
25 | DomainValidationMethod,
26 | create_domain_config,
27 | get_builtin_domain_config,
28 | # Domain string constants (avoid circular imports)
29 | DOMAIN_CODE,
30 | DOMAIN_DATA,
31 | DOMAIN_STRUCTURED,
32 | DOMAIN_RAG,
33 | DOMAIN_CONVERSATION,
34 | DOMAIN_TOOL,
35 | DOMAIN_CREATIVE,
36 | DOMAIN_SUMMARY,
37 | DOMAIN_TRANSLATION,
38 | DOMAIN_MATH,
39 | DOMAIN_SCIENCE,
40 | DOMAIN_MEDICAL,
41 | DOMAIN_LEGAL,
42 | DOMAIN_FINANCIAL,
43 | DOMAIN_GENERAL,
44 | )
45 | from .exceptions import (
46 | AuthenticationError,
47 | BudgetExceededError,
48 | cascadeflowError,
49 | ConfigError,
50 | ModelError,
51 | ProviderError,
52 | QualityThresholdError,
53 | RateLimitError,
54 | RoutingError,
55 | TimeoutError,
56 | ToolExecutionError,
57 | ValidationError,
58 | )
59 | from .model_registry import (
60 | ModelRegistry,
61 | ModelRegistryEntry,
62 | get_default_registry,
63 | get_model,
64 | has_model,
65 | )
66 | from .result import CascadeResult
67 |
68 | __all__ = [
69 | # Configuration
70 | "ModelConfig",
71 | "CascadeConfig",
72 | "UserTier",
73 | "WorkflowProfile",
74 | "LatencyProfile",
75 | "OptimizationWeights",
76 | "DEFAULT_TIERS",
77 | "EXAMPLE_WORKFLOWS",
78 | # Domain Configuration
79 | "DomainConfig",
80 | "DomainValidationMethod",
81 | "BUILTIN_DOMAIN_CONFIGS",
82 | "create_domain_config",
83 | "get_builtin_domain_config",
84 | # Domain string constants
85 | "DOMAIN_CODE",
86 | "DOMAIN_DATA",
87 | "DOMAIN_STRUCTURED",
88 | "DOMAIN_RAG",
89 | "DOMAIN_CONVERSATION",
90 | "DOMAIN_TOOL",
91 | "DOMAIN_CREATIVE",
92 | "DOMAIN_SUMMARY",
93 | "DOMAIN_TRANSLATION",
94 | "DOMAIN_MATH",
95 | "DOMAIN_SCIENCE",
96 | "DOMAIN_MEDICAL",
97 | "DOMAIN_LEGAL",
98 | "DOMAIN_FINANCIAL",
99 | "DOMAIN_GENERAL",
100 | # Model Registry
101 | "ModelRegistry",
102 | "ModelRegistryEntry",
103 | "get_default_registry",
104 | "get_model",
105 | "has_model",
106 | # Exceptions
107 | "cascadeflowError",
108 | "ConfigError",
109 | "ProviderError",
110 | "AuthenticationError",
111 | "TimeoutError",
112 | "ModelError",
113 | "BudgetExceededError",
114 | "RateLimitError",
115 | "QualityThresholdError",
116 | "RoutingError",
117 | "ValidationError",
118 | "ToolExecutionError",
119 | # Results
120 | "CascadeResult",
121 | ]
122 |
--------------------------------------------------------------------------------
/cascadeflow/providers/__init__.py:
--------------------------------------------------------------------------------
1 | """Provider implementations for cascadeflow."""
2 |
3 | import logging
4 | from typing import Dict, Optional
5 |
6 | from .anthropic import AnthropicProvider
7 | from .base import PROVIDER_CAPABILITIES, BaseProvider, ModelResponse
8 | from .deepseek import DeepSeekProvider
9 | from .groq import GroqProvider
10 | from .huggingface import HuggingFaceProvider
11 | from .ollama import OllamaProvider
12 | from .openai import OpenAIProvider
13 | from .openrouter import OpenRouterProvider
14 | from .together import TogetherProvider
15 | from .vllm import VLLMProvider
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | # Provider registry - simple dict mapping
21 | PROVIDER_REGISTRY = {
22 | "openai": OpenAIProvider,
23 | "anthropic": AnthropicProvider,
24 | "ollama": OllamaProvider,
25 | "groq": GroqProvider,
26 | "vllm": VLLMProvider,
27 | "huggingface": HuggingFaceProvider,
28 | "together": TogetherProvider,
29 | "openrouter": OpenRouterProvider,
30 | "deepseek": DeepSeekProvider,
31 | }
32 |
33 |
34 | # Optional convenience functions (can be removed if not needed)
35 |
36 |
37 | def get_provider(provider_name: str) -> Optional[BaseProvider]:
38 | """
39 | Get initialized provider instance.
40 |
41 | Convenience function - handles initialization and errors gracefully.
42 |
43 | Args:
44 | provider_name: Name of provider (e.g., 'openai', 'anthropic')
45 |
46 | Returns:
47 | Provider instance or None if initialization fails
48 | """
49 | if provider_name not in PROVIDER_REGISTRY:
50 | logger.warning(f"Unknown provider: {provider_name}")
51 | return None
52 |
53 | try:
54 | provider_class = PROVIDER_REGISTRY[provider_name]
55 | provider = provider_class()
56 | logger.debug(f"Initialized {provider_name} provider")
57 | return provider
58 | except Exception as e:
59 | logger.debug(f"Could not initialize {provider_name}: {e}")
60 | return None
61 |
62 |
63 | def get_available_providers() -> dict[str, BaseProvider]:
64 | """
65 | Get all providers that can be initialized (have API keys set).
66 |
67 | Useful for auto-discovery of available providers.
68 |
69 | Returns:
70 | Dict of provider_name -> provider_instance
71 | """
72 | providers = {}
73 |
74 | for provider_name in PROVIDER_REGISTRY.keys():
75 | provider = get_provider(provider_name)
76 | if provider is not None:
77 | providers[provider_name] = provider
78 |
79 | if providers:
80 | logger.info(f"Available providers: {', '.join(providers.keys())}")
81 | else:
82 | logger.warning("No providers available. Check API keys in .env")
83 |
84 | return providers
85 |
86 |
87 | # Exports
88 | __all__ = [
89 | "BaseProvider",
90 | "ModelResponse",
91 | "PROVIDER_CAPABILITIES",
92 | "OpenAIProvider",
93 | "AnthropicProvider",
94 | "OllamaProvider",
95 | "GroqProvider",
96 | "VLLMProvider",
97 | "HuggingFaceProvider",
98 | "TogetherProvider",
99 | "OpenRouterProvider",
100 | "DeepSeekProvider",
101 | "PROVIDER_REGISTRY",
102 | "get_provider",
103 | "get_available_providers",
104 | ]
105 |
--------------------------------------------------------------------------------
/tests/test_together.py:
--------------------------------------------------------------------------------
1 | """Tests for Together.ai provider."""
2 |
3 | import os
4 | from unittest.mock import MagicMock, patch
5 |
6 | import pytest
7 |
8 | from cascadeflow.providers.base import ModelResponse
9 | from cascadeflow.providers.together import TogetherProvider
10 |
11 |
12 | @pytest.fixture
13 | def mock_env():
14 | """Mock environment variables."""
15 | with patch.dict(os.environ, {"TOGETHER_API_KEY": "test_key"}):
16 | yield
17 |
18 |
19 | @pytest.fixture
20 | def together_provider(mock_env):
21 | """Create Together.ai provider for testing."""
22 | return TogetherProvider()
23 |
24 |
25 | @pytest.fixture
26 | def mock_together_response():
27 | """Mock successful Together.ai API response."""
28 | return {
29 | "choices": [{"message": {"content": "This is a test response."}, "finish_reason": "stop"}],
30 | "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
31 | }
32 |
33 |
34 | class TestTogetherProvider:
35 | """Tests for Together.ai provider."""
36 |
37 | def test_init_with_api_key(self):
38 | """Test initialization with explicit API key."""
39 | provider = TogetherProvider(api_key="explicit_key")
40 | assert provider.api_key == "explicit_key"
41 |
42 | def test_init_from_env(self, mock_env):
43 | """Test initialization from environment variable."""
44 | provider = TogetherProvider()
45 | assert provider.api_key == "test_key"
46 |
47 | def test_init_no_api_key(self):
48 | """Test initialization fails without API key."""
49 | with patch.dict(os.environ, {}, clear=True):
50 | with pytest.raises(ValueError, match="Together.ai API key not found"):
51 | TogetherProvider()
52 |
53 | @pytest.mark.asyncio
54 | async def test_complete_success(self, together_provider, mock_together_response):
55 | """Test successful completion."""
56 | with patch.object(together_provider.client, "post") as mock_post:
57 | mock_response = MagicMock()
58 | mock_response.json.return_value = mock_together_response
59 | mock_response.raise_for_status = MagicMock()
60 | mock_post.return_value = mock_response
61 |
62 | result = await together_provider.complete(
63 | prompt="Test prompt", model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
64 | )
65 |
66 | assert isinstance(result, ModelResponse)
67 | assert result.content == "This is a test response."
68 | assert result.model == "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
69 | assert result.provider == "together"
70 | assert result.tokens_used == 30
71 |
72 | def test_estimate_cost_8b(self, together_provider):
73 | """Test cost estimation for 8B model."""
74 | cost = together_provider.estimate_cost(1000, "Llama-3.1-8B-Instruct-Turbo")
75 | # Uses blended pricing
76 | assert 0.00015 < cost < 0.00025 # Approximately $0.0002 per 1K tokens
77 |
78 | def test_estimate_cost_70b(self, together_provider):
79 | """Test cost estimation for 70B model."""
80 | cost = together_provider.estimate_cost(1000, "Llama-3.1-70B-Instruct-Turbo")
81 | # Uses blended pricing
82 | assert 0.0007 < cost < 0.0010 # Approximately $0.0008 per 1K tokens
83 |
84 |
85 | if __name__ == "__main__":
86 | pytest.main([__file__, "-v"])
87 |
--------------------------------------------------------------------------------
/cascadeflow/tools/formats.py:
--------------------------------------------------------------------------------
1 | """
2 | Provider format conversion utilities for cascadeflow tools.
3 |
4 | Handles conversion between different provider tool formats.
5 | """
6 |
7 | import logging
8 | from enum import Enum
9 | from typing import Any
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class ToolCallFormat(Enum):
15 | """Tool call format by provider."""
16 |
17 | OPENAI = "openai" # OpenAI, Groq, Together
18 | ANTHROPIC = "anthropic" # Claude
19 | OLLAMA = "ollama" # Ollama
20 | VLLM = "vllm" # vLLM
21 | HUGGINGFACE = "huggingface" # Via Inference Providers
22 |
23 |
24 | def to_openai_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]:
25 | """
26 | Convert to OpenAI tool format.
27 |
28 | Used by: OpenAI, Groq, Together, vLLM
29 | """
30 | return {
31 | "type": "function",
32 | "function": {"name": name, "description": description, "parameters": parameters},
33 | }
34 |
35 |
36 | def to_anthropic_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]:
37 | """
38 | Convert to Anthropic tool format.
39 |
40 | Key difference: Uses 'input_schema' instead of 'parameters'
41 | """
42 | return {
43 | "name": name,
44 | "description": description,
45 | "input_schema": parameters, # Anthropic uses input_schema
46 | }
47 |
48 |
49 | def to_ollama_format(name: str, description: str, parameters: dict[str, Any]) -> dict[str, Any]:
50 | """Convert to Ollama tool format (same as OpenAI)."""
51 | return to_openai_format(name, description, parameters)
52 |
53 |
54 | def to_provider_format(
55 | provider: str, name: str, description: str, parameters: dict[str, Any]
56 | ) -> dict[str, Any]:
57 | """
58 | Convert to provider-specific format.
59 |
60 | Args:
61 | provider: Provider name (openai, anthropic, ollama, groq, together, vllm)
62 | name: Tool name
63 | description: Tool description
64 | parameters: Tool parameters (JSON schema)
65 |
66 | Returns:
67 | Tool schema in provider's expected format
68 | """
69 | provider_lower = provider.lower()
70 |
71 | if provider_lower in ("openai", "groq", "together", "vllm", "huggingface"):
72 | return to_openai_format(name, description, parameters)
73 | elif provider_lower == "anthropic":
74 | return to_anthropic_format(name, description, parameters)
75 | elif provider_lower == "ollama":
76 | return to_ollama_format(name, description, parameters)
77 | else:
78 | # Default to OpenAI format (most common)
79 | logger.warning(f"Unknown provider '{provider}', using OpenAI format")
80 | return to_openai_format(name, description, parameters)
81 |
82 |
83 | def get_provider_format_type(provider: str) -> ToolCallFormat:
84 | """
85 | Get the format type for a provider.
86 |
87 | Args:
88 | provider: Provider name
89 |
90 | Returns:
91 | ToolCallFormat enum value
92 | """
93 | provider_lower = provider.lower()
94 |
95 | if provider_lower in ("openai", "groq", "together", "vllm", "huggingface"):
96 | return ToolCallFormat.OPENAI
97 | elif provider_lower == "anthropic":
98 | return ToolCallFormat.ANTHROPIC
99 | elif provider_lower == "ollama":
100 | return ToolCallFormat.OLLAMA
101 | else:
102 | return ToolCallFormat.OPENAI # Default
103 |
--------------------------------------------------------------------------------
/packages/integrations/n8n/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
1 | # n8n Integration Troubleshooting
2 |
3 | ## Issue: Seeing old model names in logs after reconnecting
4 |
5 | ### Root Cause
6 | n8n caches the node instance. When you disconnect/reconnect models, the old `CascadeChatModel` instance may still have references to previous models.
7 |
8 | ### Solution
9 | 1. **Stop the workflow** in n8n
10 | 2. **Restart the workflow** (or restart n8n if that doesn't work)
11 | 3. **Look for initialization log**:
12 | ```
13 | 🚀 CascadeFlow initialized
14 | PORT MAPPING:
15 | ├─ TOP port (labeled "Verifier") → VERIFIER model: lazy-loaded (will fetch only if needed)
16 | └─ BOTTOM port (labeled "Drafter") → DRAFTER model: [type] ([name])
17 | ```
18 |
19 | This shows which models are ACTUALLY connected.
20 |
21 | ### Verifying Correct Operation
22 |
23 | **Expected logs when drafter is accepted:**
24 | ```
25 | 🎯 CascadeFlow: Trying drafter model (from BOTTOM port): ollama (gemma3:1b)
26 | 📊 Simple quality check: confidence=0.75
27 |
28 | ┌─────────────────────────────────────────┐
29 | │ ✅ FLOW: DRAFTER ACCEPTED (FAST PATH) │
30 | └─────────────────────────────────────────┘
31 | Model used: ollama (gemma3:1b)
32 | Confidence: 0.75 (threshold: 0.64)
33 | ```
34 |
35 | **Expected logs when verifier is triggered:**
36 | ```
37 | 🎯 CascadeFlow: Trying drafter model (from BOTTOM port): ollama (gemma3:1b)
38 | 📊 Simple quality check: confidence=0.50
39 |
40 | ┌────────────────────────────────────────────────┐
41 | │ ⚠️ FLOW: ESCALATED TO VERIFIER (SLOW PATH) │
42 | └────────────────────────────────────────────────┘
43 | 🔄 Loading verifier model from TOP port (labeled "Verifier")...
44 | ✓ Verifier model loaded: ollama (mistral:7b-instruct)
45 | ✅ Verifier completed successfully
46 | Model used: ollama (mistral:7b-instruct)
47 | ```
48 |
49 | ## Issue: "Only drafts getting accepted"
50 |
51 | ### Is this a problem?
52 | **NO - This is correct behavior!**
53 |
54 | With quality threshold 0.64:
55 | - If drafter produces good responses → Quality check passes → Use cheap model (SAVE MONEY ✅)
56 | - If drafter produces poor responses → Quality check fails → Escalate to verifier
57 |
58 | ### When to adjust threshold
59 |
60 | **See 100% drafter acceptance?**
61 | - Your drafter is doing well for these queries
62 | - Consider lowering threshold to 0.50-0.55 if you want stricter quality
63 |
64 | **See 100% verifier escalation?**
65 | - Drafter quality too low for these queries
66 | - Increase threshold to 0.70-0.80 to accept more drafts
67 | - Or use a better drafter model
68 |
69 | ### Testing Verifier Triggering
70 |
71 | To force verifier usage, try:
72 | 1. Lower quality threshold to 0.90 (very strict)
73 | 2. Ask complex questions that drafter struggles with
74 | 3. Use a weaker drafter model
75 |
76 | ## Checking Model Connections
77 |
78 | **Initialization log location:**
79 | - n8n workflow logs (when workflow starts)
80 | - Server console logs (if running n8n manually)
81 |
82 | **Per-request logs:**
83 | - Show in n8n execution logs
84 | - Show actual model used: `Model used: [type] ([name])`
85 |
86 | ## Common Mistakes
87 |
88 | ❌ **Connecting models to wrong ports**
89 | - TOP port = Verifier (expensive, high quality)
90 | - BOTTOM port = Drafter (cheap, tried first)
91 |
92 | ❌ **Not restarting workflow after changing connections**
93 | - Must restart for new models to be loaded
94 |
95 | ❌ **Expecting verifier to be called every time**
96 | - Verifier is ONLY called when drafter quality < threshold
97 | - This is the cost-saving feature!
98 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
1 | name: ❓ Question or Discussion
2 | description: Ask a question or start a discussion about cascadeflow
3 | title: "[Question]: "
4 | labels: ["question"]
5 | assignees: []
6 |
7 | body:
8 | - type: markdown
9 | attributes:
10 | value: |
11 | Thanks for your interest in cascadeflow!
12 |
13 | **Before asking, please check:**
14 | - [Documentation](https://github.com/lemony-ai/cascadeflow/blob/main/README.md)
15 | - [Existing issues](https://github.com/lemony-ai/cascadeflow/issues)
16 | - [Discussions](https://github.com/lemony-ai/cascadeflow/discussions)
17 |
18 | - type: dropdown
19 | id: topic
20 | attributes:
21 | label: Topic area
22 | description: What is your question about?
23 | options:
24 | - Getting started / Installation
25 | - Configuration / Setup
26 | - Provider integration (OpenAI, Anthropic, etc.)
27 | - Routing strategies
28 | - Cost optimization
29 | - Quality estimation
30 | - Performance / Speed
31 | - Tool calling / Functions
32 | - Streaming
33 | - Error handling
34 | - Best practices
35 | - Use case / Architecture
36 | - Comparison with alternatives
37 | - Contributing
38 | - Other
39 | validations:
40 | required: true
41 |
42 | - type: textarea
43 | id: question
44 | attributes:
45 | label: Your question
46 | description: What would you like to know?
47 | placeholder: |
48 | I'm trying to understand how to...
49 |
50 | What's the best way to...
51 |
52 | How does X compare to Y...
53 | validations:
54 | required: true
55 |
56 | - type: textarea
57 | id: context
58 | attributes:
59 | label: Context
60 | description: Provide any relevant context about your use case or what you're trying to achieve
61 | placeholder: |
62 | I'm building an application that...
63 | My goal is to...
64 | I've tried...
65 | validations:
66 | required: false
67 |
68 | - type: textarea
69 | id: code
70 | attributes:
71 | label: Code example (if applicable)
72 | description: Share relevant code to help us understand your question
73 | placeholder: |
74 | ```python
75 | from cascadeflow import CascadeAgent
76 |
77 | # Your code here
78 | ```
79 | render: python
80 | validations:
81 | required: false
82 |
83 | - type: textarea
84 | id: attempted
85 | attributes:
86 | label: What have you tried?
87 | description: Have you already attempted to solve this or looked anywhere for answers?
88 | placeholder: |
89 | - I've read the documentation at...
90 | - I've tried the following approach...
91 | - I've searched for...
92 | validations:
93 | required: false
94 |
95 | - type: input
96 | id: version
97 | attributes:
98 | label: cascadeflow version (if relevant)
99 | placeholder: e.g., 0.1.0
100 | validations:
101 | required: false
102 |
103 | - type: checkboxes
104 | id: checks
105 | attributes:
106 | label: Pre-submission checklist
107 | options:
108 | - label: I have searched existing issues and discussions
109 | required: true
110 | - label: I have checked the documentation
111 | required: true
112 | - label: This is not a bug report (use bug report template instead)
113 | required: true
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Quick test to verify all imports work after __init__.py update."""
2 |
3 |
4 | def test_core_imports():
5 | """Test core configuration imports."""
6 | from cascadeflow import CascadeAgent, CascadeConfig, CascadeResult, ModelConfig, UserTier
7 |
8 | print("✓ Core imports working")
9 |
10 |
11 | def test_day42_config_imports():
12 | """Test Day 4.2 configuration imports."""
13 | from cascadeflow import (
14 | DEFAULT_TIERS,
15 | EXAMPLE_WORKFLOWS,
16 | LatencyProfile,
17 | OptimizationWeights,
18 | WorkflowProfile,
19 | )
20 |
21 | print("✓ Day 4.2 config imports working")
22 | print(f" - Found {len(DEFAULT_TIERS)} default tiers")
23 | print(f" - Found {len(EXAMPLE_WORKFLOWS)} example workflows")
24 |
25 |
26 | def test_intelligence_imports():
27 | """Test intelligence layer imports."""
28 | from cascadeflow import (
29 | ComplexityDetector,
30 | DomainDetector,
31 | ExecutionPlan,
32 | ExecutionStrategy,
33 | LatencyAwareExecutionPlanner,
34 | ModelScorer,
35 | QueryComplexity,
36 | )
37 |
38 | print("✓ Intelligence layer imports working")
39 |
40 |
41 | def test_speculative_imports():
42 | """Test speculative cascade imports."""
43 | from cascadeflow import (
44 | DeferralStrategy,
45 | FlexibleDeferralRule,
46 | SpeculativeCascade,
47 | SpeculativeResult,
48 | )
49 |
50 | print("✓ Speculative cascade imports working")
51 |
52 |
53 | def test_features_imports():
54 | """Test supporting features imports."""
55 | from cascadeflow import (
56 | CallbackData,
57 | CallbackEvent,
58 | CallbackManager,
59 | CascadePresets,
60 | ResponseCache,
61 | StreamManager,
62 | )
63 |
64 | print("✓ Supporting features imports working")
65 |
66 |
67 | def test_providers_imports():
68 | """Test provider imports."""
69 | from cascadeflow import PROVIDER_REGISTRY, BaseProvider, ModelResponse
70 |
71 | print("✓ Provider imports working")
72 |
73 |
74 | def test_utils_imports():
75 | """Test utility imports."""
76 | from cascadeflow import estimate_tokens, format_cost, setup_logging
77 |
78 | print("✓ Utility imports working")
79 |
80 |
81 | def test_exceptions_imports():
82 | """Test exception imports."""
83 | from cascadeflow import (
84 | BudgetExceededError,
85 | cascadeflowError,
86 | ConfigError,
87 | ModelError,
88 | ProviderError,
89 | QualityThresholdError,
90 | RateLimitError,
91 | RoutingError,
92 | ValidationError,
93 | )
94 |
95 | print("✓ Exception imports working")
96 |
97 |
98 | def test_version():
99 | """Test version info."""
100 | from cascadeflow import __version__
101 |
102 | print(f"✓ Version: {__version__}")
103 | assert __version__ == "0.4.2"
104 |
105 |
106 | if __name__ == "__main__":
107 | print("Testing cascadeflow imports...\n")
108 |
109 | try:
110 | test_core_imports()
111 | test_day42_config_imports()
112 | test_intelligence_imports()
113 | test_speculative_imports()
114 | test_features_imports()
115 | test_providers_imports()
116 | test_utils_imports()
117 | test_exceptions_imports()
118 | test_version()
119 |
120 | print("\n✅ All imports successful!")
121 |
122 | except ImportError as e:
123 | print(f"\n❌ Import failed: {e}")
124 | import traceback
125 |
126 | traceback.print_exc()
127 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/examples/analyze-models.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Model Analysis Example
3 | *
4 | * Demonstrates how to analyze and validate cascade configurations
5 | * using your existing LangChain model instances.
6 | */
7 |
8 | import { ChatOpenAI } from '@langchain/openai';
9 | import { analyzeCascadePair, suggestCascadePairs } from '../src/index.js';
10 |
11 | async function main() {
12 | console.log('=== CascadeFlow Model Analysis Demo ===\n');
13 |
14 | // Example 1: Analyze a specific drafter/verifier pair
15 | console.log('--- Example 1: Analyze OpenAI Cascade Pair ---');
16 |
17 | const drafterOpenAI = new ChatOpenAI({ model: 'gpt-4o-mini' });
18 | const verifierOpenAI = new ChatOpenAI({ model: 'gpt-4o' });
19 |
20 | const analysis1 = analyzeCascadePair(drafterOpenAI, verifierOpenAI);
21 |
22 | console.log(`Drafter: ${analysis1.drafterModel}`);
23 | console.log(`Verifier: ${analysis1.verifierModel}`);
24 | console.log(`\nPricing (per 1M tokens):`);
25 | console.log(` Drafter: $${analysis1.drafterCost.input} input / $${analysis1.drafterCost.output} output`);
26 | console.log(` Verifier: $${analysis1.verifierCost.input} input / $${analysis1.verifierCost.output} output`);
27 | console.log(`\nEstimated Savings: ${analysis1.estimatedSavings.toFixed(1)}%`);
28 | console.log(`Configuration Valid: ${analysis1.valid ? '✅' : '❌'}`);
29 | console.log(`Recommendation: ${analysis1.recommendation}`);
30 |
31 | if (analysis1.warnings.length > 0) {
32 | console.log(`\nWarnings:`);
33 | analysis1.warnings.forEach(w => console.log(` ⚠️ ${w}`));
34 | }
35 | console.log('\n');
36 |
37 | // Example 2: Detect misconfiguration (drafter more expensive than verifier)
38 | console.log('--- Example 2: Detect Misconfiguration ---');
39 |
40 | const expensiveDrafter = new ChatOpenAI({ model: 'gpt-4o' });
41 | const cheapVerifier = new ChatOpenAI({ model: 'gpt-4o-mini' });
42 |
43 | const analysis2 = analyzeCascadePair(expensiveDrafter, cheapVerifier);
44 |
45 | console.log(`Drafter: ${analysis2.drafterModel}`);
46 | console.log(`Verifier: ${analysis2.verifierModel}`);
47 | console.log(`Configuration Valid: ${analysis2.valid ? '✅' : '❌'}`);
48 | console.log(`Recommendation: ${analysis2.recommendation}`);
49 |
50 | if (analysis2.warnings.length > 0) {
51 | console.log(`\nWarnings:`);
52 | analysis2.warnings.forEach(w => console.log(` ⚠️ ${w}`));
53 | }
54 | console.log('\n');
55 |
56 | // Example 3: Suggest optimal pairs from available models
57 | console.log('--- Example 3: Suggest Optimal Cascade Pairs ---');
58 |
59 | const availableModels = [
60 | new ChatOpenAI({ model: 'gpt-4o-mini' }),
61 | new ChatOpenAI({ model: 'gpt-4o' }),
62 | new ChatOpenAI({ model: 'gpt-3.5-turbo' }),
63 | new ChatOpenAI({ model: 'gpt-4-turbo' }),
64 | ];
65 |
66 | console.log(`Analyzing ${availableModels.length} available models...\n`);
67 |
68 | const suggestions = suggestCascadePairs(availableModels);
69 |
70 | console.log(`Found ${suggestions.length} viable cascade configurations:\n`);
71 |
72 | suggestions.slice(0, 5).forEach((suggestion, idx) => {
73 | const { drafter, verifier, analysis } = suggestion;
74 | console.log(`${idx + 1}. ${analysis.drafterModel} → ${analysis.verifierModel}`);
75 | console.log(` Estimated Savings: ${analysis.estimatedSavings.toFixed(1)}%`);
76 | console.log(` ${analysis.recommendation}`);
77 | console.log();
78 | });
79 |
80 | console.log('=== Analysis Complete ===');
81 | console.log('\n💡 Use analyzeCascadePair() to validate your cascade configuration');
82 | console.log('💡 Use suggestCascadePairs() to find optimal pairs from your models');
83 | }
84 |
85 | main().catch(console.error);
86 |
--------------------------------------------------------------------------------
/packages/langchain-cascadeflow/src/types.ts:
--------------------------------------------------------------------------------
1 | import type { PreRouter } from './routers/pre-router.js';
2 | import type { QueryComplexity } from './complexity.js';
3 |
4 | /**
5 | * Configuration for the CascadeFlow wrapper
6 | */
7 | export interface CascadeConfig {
8 | /**
9 | * The drafter model (cheap, fast) - tries first
10 | */
11 | drafter: any; // BaseChatModel from @langchain/core
12 |
13 | /**
14 | * The verifier model (expensive, accurate) - used when quality is insufficient
15 | */
16 | verifier: any; // BaseChatModel from @langchain/core
17 |
18 | /**
19 | * Quality threshold for accepting drafter responses (0-1)
20 | * @default 0.7
21 | */
22 | qualityThreshold?: number;
23 |
24 | /**
25 | * Enable automatic cost tracking
26 | * @default true
27 | */
28 | enableCostTracking?: boolean;
29 |
30 | /**
31 | * Cost tracking provider
32 | * - 'langsmith': Use LangSmith's server-side cost calculation (default, requires LANGSMITH_API_KEY)
33 | * - 'cascadeflow': Use CascadeFlow's built-in pricing table (no external dependencies)
34 | * @default 'langsmith'
35 | */
36 | costTrackingProvider?: 'langsmith' | 'cascadeflow';
37 |
38 | /**
39 | * Custom quality validator function
40 | * Returns confidence score between 0-1
41 | */
42 | qualityValidator?: (response: any) => Promise | number;
43 |
44 | /**
45 | * Enable pre-routing based on query complexity
46 | * When enabled, 'hard' and 'expert' queries skip the drafter and go directly to the verifier
47 | * @default true
48 | */
49 | enablePreRouter?: boolean;
50 |
51 | /**
52 | * Custom PreRouter instance for advanced routing control
53 | * If not provided, a default PreRouter will be created when enablePreRouter is true
54 | */
55 | preRouter?: PreRouter;
56 |
57 | /**
58 | * Complexity levels that should use cascade (try drafter first)
59 | * Queries with other complexity levels go directly to verifier
60 | * @default ['trivial', 'simple', 'moderate']
61 | */
62 | cascadeComplexities?: QueryComplexity[];
63 | }
64 |
65 | /**
66 | * Cascade execution result with cost metadata
67 | */
68 | export interface CascadeResult {
69 | /**
70 | * The final response content
71 | */
72 | content: string;
73 |
74 | /**
75 | * Model that provided the final response ('drafter' | 'verifier')
76 | */
77 | modelUsed: 'drafter' | 'verifier';
78 |
79 | /**
80 | * Quality score of the drafter response (0-1)
81 | */
82 | drafterQuality?: number;
83 |
84 | /**
85 | * Whether the drafter response was accepted
86 | */
87 | accepted: boolean;
88 |
89 | /**
90 | * Cost of the drafter call
91 | */
92 | drafterCost: number;
93 |
94 | /**
95 | * Cost of the verifier call (0 if not used)
96 | */
97 | verifierCost: number;
98 |
99 | /**
100 | * Total cost of the cascade
101 | */
102 | totalCost: number;
103 |
104 | /**
105 | * Cost savings percentage (0-100)
106 | */
107 | savingsPercentage: number;
108 |
109 | /**
110 | * Latency in milliseconds
111 | */
112 | latencyMs: number;
113 | }
114 |
115 | /**
116 | * Internal cost calculation metadata
117 | */
118 | export interface CostMetadata {
119 | drafterTokens: {
120 | input: number;
121 | output: number;
122 | };
123 | verifierTokens?: {
124 | input: number;
125 | output: number;
126 | };
127 | drafterCost: number;
128 | verifierCost: number;
129 | totalCost: number;
130 | savingsPercentage: number;
131 | modelUsed: 'drafter' | 'verifier';
132 | accepted: boolean;
133 | drafterQuality?: number;
134 | }
135 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # cascadeflow Documentation
2 |
3 | Welcome to cascadeflow documentation! 🌊
4 |
5 | ## 📖 Quick Links
6 |
7 | - [Installation Guide](INSTALLATION.md)
8 | - [Quick Start Guide](guides/quickstart.md)
9 |
10 | ## 🚀 Getting Started
11 |
12 | ### Core Concepts
13 | - [Quickstart](guides/quickstart.md) - Get started with cascadeflow in 5 minutes
14 | - [Providers](guides/providers.md) - Configure and use different AI providers (OpenAI, Anthropic, Groq, Ollama, etc.)
15 | - [Presets](guides/presets.md) - Use built-in presets for common use cases
16 |
17 | ### Core Features
18 | - [Streaming](guides/streaming.md) - Stream responses from cascade agents
19 | - [Tools](guides/tools.md) - Function calling and tool usage with cascades
20 | - [Cost Tracking](guides/cost_tracking.md) - Track and analyze API costs across queries
21 |
22 | ## 🏭 Production & Advanced
23 |
24 | ### Production Deployment
25 | - [Production Guide](guides/production.md) - Best practices for production deployments
26 | - [Performance Guide](guides/performance.md) - Optimize cascade performance and latency
27 | - [FastAPI Integration](guides/fastapi.md) - Integrate cascadeflow with FastAPI applications
28 |
29 | ### Advanced Topics
30 | - [Custom Cascades](guides/custom_cascade.md) - Build custom cascade strategies
31 | - [Custom Validation](guides/custom_validation.md) - Implement custom quality validators
32 | - [Edge Device Deployment](guides/edge_device.md) - Deploy cascades on edge devices (Jetson, etc.)
33 | - [Browser/Edge Runtime](guides/browser_cascading.md) - Run cascades in browser or edge environments
34 |
35 | ### Integrations
36 | - [n8n Integration](guides/n8n_integration.md) - Use cascadeflow in n8n workflows
37 |
38 | ## 📚 Examples
39 |
40 | Comprehensive working code samples:
41 |
42 | **Python Examples:** [examples/](../examples/)
43 | - Basic usage, preset usage, multi-provider
44 | - Tool execution, streaming, cost tracking
45 | - Production patterns, FastAPI integration
46 | - Edge device deployment, vLLM integration
47 | - Custom cascades and validation
48 |
49 | **TypeScript Examples:** [packages/core/examples/](../packages/core/examples/)
50 | - Basic usage, tool calling, multi-provider
51 | - Streaming responses
52 | - Production patterns
53 | - Browser/Vercel Edge deployment
54 |
55 | ## 🤝 Need Help?
56 |
57 | - 📖 [GitHub Discussions](https://github.com/lemony-ai/cascadeflow/discussions) - Q&A and community support
58 | - 🐛 [GitHub Issues](https://github.com/lemony-ai/cascadeflow/issues) - Bug reports and feature requests
59 | - 📧 [Email Support](mailto:hello@lemony.ai) - Direct support
60 |
61 | ## 📦 API Reference
62 |
63 | Comprehensive API documentation for all classes and methods:
64 |
65 | - **[API Overview](./api/README.md)** - Complete API reference for Python and TypeScript
66 | - **Python API**
67 | - [CascadeAgent](./api/python/agent.md) - Main agent class
68 | - [ModelConfig](./api/python/config.md) - Model and cascade configuration
69 | - [CascadeResult](./api/python/result.md) - Result object with 30+ diagnostic fields
70 | - **TypeScript API**
71 | - See [TypeScript Package](../packages/core/README.md) for API documentation
72 |
73 | See also: Comprehensive examples in [/examples](../examples/) directory
74 |
75 | ## 🏗️ Architecture & Contributing
76 |
77 | For contributors and advanced users:
78 |
79 | - **[Architecture Guide](ARCHITECTURE.md)** - Detailed architecture, data flow, and code organization
80 | - **[Contributing Guide](../CONTRIBUTING.md)** - How to contribute to cascadeflow
81 |
82 | The architecture guide covers:
83 | - Directory structure (monorepo layout)
84 | - Core components and design patterns
85 | - Data flow and execution paths
86 | - Adding new providers, quality checks, and routing strategies
87 | - Testing strategy and development workflow
88 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # cascadeflow Development Requirements
2 | # Everything needed for development, testing, and contributing
3 |
4 | # ============================================================================
5 | # CORE DEPENDENCIES
6 | # ============================================================================
7 |
8 | -r requirements.txt
9 |
10 |
11 | # ============================================================================
12 | # PROVIDER SDKs (For Testing All Providers)
13 | # ============================================================================
14 |
15 | # API-based providers (require API keys)
16 | openai>=1.0.0
17 | anthropic>=0.8.0
18 | groq>=0.4.0
19 | huggingface-hub>=0.19.0
20 | together>=0.2.0
21 |
22 | # Local inference (optional - can also use HTTP)
23 | vllm>=0.2.0
24 |
25 | # Note: Ollama doesn't need a Python package - uses HTTP
26 |
27 |
28 | # ============================================================================
29 | # TESTING
30 | # ============================================================================
31 |
32 | pytest>=7.4.0
33 | pytest-asyncio>=0.21.0
34 | pytest-cov>=4.1.0
35 | pytest-mock>=3.12.0
36 |
37 | # Environment variable management for tests
38 | python-dotenv>=1.0.0
39 |
40 |
41 | # ============================================================================
42 | # CODE QUALITY
43 | # ============================================================================
44 |
45 | # Formatting
46 | black>=23.0.0
47 | isort>=5.12.0
48 |
49 | # Linting
50 | ruff>=0.1.0
51 |
52 | # Type checking
53 | mypy>=1.5.0
54 |
55 | # Pre-commit hooks
56 | pre-commit>=3.5.0
57 |
58 |
59 | # ============================================================================
60 | # SECURITY SCANNING
61 | # ============================================================================
62 |
63 | # Python security linter
64 | bandit>=1.7.0
65 |
66 | # Check for known vulnerabilities in dependencies
67 | safety>=2.3.0
68 |
69 | # Audit Python packages for known vulnerabilities
70 | pip-audit>=2.4.0
71 |
72 |
73 | # ============================================================================
74 | # DEVELOPMENT UTILITIES
75 | # ============================================================================
76 |
77 | # Rich terminal output (for development/debugging)
78 | rich>=13.0.0
79 |
80 | # Web framework for API examples
81 | fastapi>=0.104.0
82 | uvicorn>=0.24.0
83 |
84 | # HTTP client (for health checks in examples)
85 | httpx>=0.25.0
86 |
87 | # Type stubs
88 | types-requests>=2.31.0
89 |
90 |
91 | # ============================================================================
92 | # DOCUMENTATION (Optional)
93 | # ============================================================================
94 |
95 | # Uncomment if building docs:
96 | # mkdocs>=1.5.0
97 | # mkdocs-material>=9.4.0
98 | # mkdocstrings[python]>=0.23.0
99 |
100 |
101 | # ============================================================================
102 | # SEMANTIC FEATURES (For ML-based functionality)
103 | # ============================================================================
104 |
105 | # Lightweight embedding model for semantic quality checks
106 | # Note: This is optional but required for semantic quality tests
107 | fastembed>=0.2.0
108 |
109 |
110 | # ============================================================================
111 | # INSTALLATION
112 | # ============================================================================
113 |
114 | # Install everything for development:
115 | # pip install -r requirements-dev.txt
116 | #
117 | # Or install in editable mode:
118 | # pip install -e ".[dev]"
119 | #
120 | # Run security checks:
121 | # bandit -r cascadeflow/
122 | # safety check
123 | # pip-audit
--------------------------------------------------------------------------------
/cascadeflow/guardrails/manager.py:
--------------------------------------------------------------------------------
1 | """
2 | Guardrails manager for coordinating content safety checks.
3 | """
4 |
5 | from dataclasses import dataclass
6 | from typing import TYPE_CHECKING, Optional
7 |
8 | from .content_moderator import ContentModerator, ModerationResult
9 | from .pii_detector import PIIDetector, PIIMatch
10 |
11 | if TYPE_CHECKING:
12 | from cascadeflow.profiles import UserProfile
13 |
14 |
15 | class GuardrailViolation(Exception):
16 | """Exception raised when content violates guardrails"""
17 |
18 | def __init__(self, message: str, violations: list[str]):
19 | super().__init__(message)
20 | self.violations = violations
21 |
22 |
23 | @dataclass
24 | class GuardrailsCheck:
25 | """Result from guardrails check"""
26 |
27 | is_safe: bool
28 | content_moderation: Optional[ModerationResult] = None
29 | pii_detected: Optional[list[PIIMatch]] = None
30 | violations: list[str] = None
31 |
32 | def __post_init__(self):
33 | if self.violations is None:
34 | self.violations = []
35 |
36 |
37 | class GuardrailsManager:
38 | """
39 | Centralized guardrails management.
40 |
41 | Coordinates content moderation and PII detection based on
42 | user profile settings.
43 |
44 | Example:
45 | >>> manager = GuardrailsManager()
46 | >>> result = await manager.check_content(
47 | ... text="user input",
48 | ... profile=profile
49 | ... )
50 | >>> if not result.is_safe:
51 | ... raise GuardrailViolation("Content blocked", result.violations)
52 | """
53 |
54 | def __init__(self):
55 | """Initialize guardrails manager"""
56 | self._content_moderator = ContentModerator()
57 | self._pii_detector = PIIDetector()
58 |
59 | async def check_content(
60 | self,
61 | text: str,
62 | profile: "UserProfile",
63 | ) -> GuardrailsCheck:
64 | """
65 | Check content against enabled guardrails.
66 |
67 | Args:
68 | text: Text to check
69 | profile: User profile with guardrail settings
70 |
71 | Returns:
72 | GuardrailsCheck with results
73 | """
74 | violations = []
75 | moderation_result = None
76 | pii_matches = None
77 |
78 | # Check content moderation if enabled
79 | if profile.enable_content_moderation:
80 | moderation_result = await self._content_moderator.check_async(text)
81 | if not moderation_result.is_safe:
82 | violations.extend(moderation_result.violations)
83 |
84 | # Check PII if enabled
85 | if profile.enable_pii_detection:
86 | pii_matches = await self._pii_detector.detect_async(text)
87 | if pii_matches:
88 | pii_types = {m.pii_type for m in pii_matches}
89 | violations.append(f"PII detected: {', '.join(pii_types)}")
90 |
91 | is_safe = len(violations) == 0
92 |
93 | return GuardrailsCheck(
94 | is_safe=is_safe,
95 | content_moderation=moderation_result,
96 | pii_detected=pii_matches,
97 | violations=violations,
98 | )
99 |
100 | async def redact_pii(
101 | self,
102 | text: str,
103 | profile: "UserProfile",
104 | ) -> tuple[str, list[PIIMatch]]:
105 | """
106 | Redact PII from text if PII detection is enabled.
107 |
108 | Args:
109 | text: Text to redact
110 | profile: User profile
111 |
112 | Returns:
113 | Tuple of (redacted_text, pii_matches)
114 | """
115 | if not profile.enable_pii_detection:
116 | return text, []
117 |
118 | return self._pii_detector.redact(text)
119 |
--------------------------------------------------------------------------------
/cascadeflow/scripts/format_code.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | REM cascadeflow Code Formatting Script (Windows)
3 | REM Run this before every commit and definitely before launch!
4 |
5 | echo.
6 | echo 🎨 cascadeflow - Code Formatting Script
7 | echo ========================================
8 | echo.
9 |
10 | REM Check if virtual environment is activated
11 | if not defined VIRTUAL_ENV (
12 | echo ⚠️ Virtual environment not activated!
13 | echo Please run: .venv\Scripts\activate
14 | exit /b 1
15 | )
16 |
17 | REM Check if we're in the right directory
18 | if not exist pyproject.toml (
19 | echo ❌ Error: Not in project root directory
20 | echo Please run this script from the cascadeflow\ directory
21 | exit /b 1
22 | )
23 |
24 | echo ✅ Virtual environment: %VIRTUAL_ENV%
25 | echo ✅ Working directory: %CD%
26 | echo.
27 |
28 | REM Install formatting tools if not present
29 | echo 📦 Checking formatting tools...
30 | pip install --quiet --upgrade black isort ruff mypy 2>nul || (
31 | echo Installing formatting tools...
32 | pip install black isort ruff mypy
33 | )
34 | echo ✅ Formatting tools ready
35 | echo.
36 |
37 | REM Step 1: Black - Code formatting
38 | echo 🎨 Step 1/4: Running Black (code formatter)...
39 | echo -------------------------------------------
40 | black cascadeflow\ tests\ examples\ --line-length 100
41 | if errorlevel 1 (
42 | echo ❌ Black formatting failed
43 | exit /b 1
44 | )
45 | echo ✅ Black formatting complete
46 | echo.
47 |
48 | REM Step 2: isort - Import sorting
49 | echo 📦 Step 2/4: Running isort (import sorter)...
50 | echo --------------------------------------------
51 | isort cascadeflow\ tests\ examples\ --profile black --line-length 100
52 | if errorlevel 1 (
53 | echo ❌ isort failed
54 | exit /b 1
55 | )
56 | echo ✅ Import sorting complete
57 | echo.
58 |
59 | REM Step 3: Ruff - Linting and auto-fix
60 | echo 🔍 Step 3/4: Running Ruff (linter)...
61 | echo ------------------------------------
62 | echo Checking for issues...
63 | ruff check cascadeflow\ tests\ examples\ --fix
64 | if errorlevel 1 (
65 | echo ⚠️ Ruff found some issues
66 | echo Attempting to auto-fix...
67 | ruff check cascadeflow\ tests\ examples\ --fix --unsafe-fixes
68 | if errorlevel 1 (
69 | echo ❌ Some issues need manual fixing
70 | echo Review the output above and fix manually
71 | exit /b 1
72 | )
73 | )
74 | echo ✅ Linting complete
75 | echo.
76 |
77 | REM Step 4: mypy - Type checking (optional, won't fail)
78 | echo 🔤 Step 4/4: Running mypy (type checker)...
79 | echo -----------------------------------------
80 | mypy cascadeflow\ --ignore-missing-imports --no-strict-optional
81 | if errorlevel 1 (
82 | echo ⚠️ Type checking found some issues (non-critical)
83 | echo Consider fixing these before launch, but not required
84 | )
85 | echo ✅ Type checking complete
86 | echo.
87 |
88 | REM Final verification
89 | echo 🧪 Running quick verification...
90 | echo ------------------------------
91 | python -m py_compile cascadeflow\*.py 2>nul
92 | if errorlevel 1 (
93 | echo ❌ Syntax errors detected in cascadeflow\
94 | exit /b 1
95 | )
96 | echo ✅ Syntax verification passed
97 | echo.
98 |
99 | REM Summary
100 | echo ======================================
101 | echo ✨ Code Formatting Complete!
102 | echo ======================================
103 | echo.
104 | echo Summary:
105 | echo ✅ Black formatting applied
106 | echo ✅ Imports sorted with isort
107 | echo ✅ Linting issues fixed with Ruff
108 | echo ✅ Type checking completed
109 | echo ✅ Syntax verification passed
110 | echo.
111 | echo Next steps:
112 | echo 1. Review changes: git diff
113 | echo 2. Run tests: pytest tests\ -v
114 | echo 3. Commit: git add . ^&^& git commit -m "style: Format code with Black/isort/Ruff"
115 | echo.
116 | echo 🚀 Ready for launch!
117 | pause
--------------------------------------------------------------------------------
/cascadeflow/tools/result.py:
--------------------------------------------------------------------------------
1 | """
2 | Tool result formatting for cascadeflow.
3 |
4 | Handles formatting tool execution results for different providers.
5 | """
6 |
7 | import logging
8 | from dataclasses import dataclass
9 | from typing import Any, Optional
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | @dataclass
15 | class ToolResult:
16 | """
17 | Result from executing a tool.
18 |
19 | This is passed back to the model after tool execution.
20 | """
21 |
22 | call_id: str # ID of the tool call
23 | name: str # Tool name
24 | result: Any # Tool output
25 | error: Optional[str] = None # Error message if tool failed
26 | execution_time_ms: Optional[float] = None # How long tool took
27 |
28 | @property
29 | def success(self) -> bool:
30 | """Whether tool execution succeeded."""
31 | return self.error is None
32 |
33 | def to_openai_message(self) -> dict[str, Any]:
34 | """
35 | Format as OpenAI tool result message.
36 |
37 | Used by: OpenAI, Groq, Together, vLLM
38 |
39 | Format:
40 | {
41 | "tool_call_id": "call_123",
42 | "role": "tool",
43 | "name": "get_weather",
44 | "content": "{'temp': 22, 'condition': 'sunny'}"
45 | }
46 | """
47 | content = str(self.result) if not self.error else f"Error: {self.error}"
48 |
49 | return {"tool_call_id": self.call_id, "role": "tool", "name": self.name, "content": content}
50 |
51 | def to_anthropic_message(self) -> dict[str, Any]:
52 | """
53 | Format as Anthropic tool result message.
54 |
55 | Key difference: Uses content blocks instead of role="tool"
56 |
57 | Format:
58 | {
59 | "role": "user",
60 | "content": [{
61 | "type": "tool_result",
62 | "tool_use_id": "toolu_123",
63 | "content": "{'temp': 22, 'condition': 'sunny'}",
64 | "is_error": false
65 | }]
66 | }
67 | """
68 | content = str(self.result) if not self.error else f"Error: {self.error}"
69 |
70 | return {
71 | "role": "user",
72 | "content": [
73 | {
74 | "type": "tool_result",
75 | "tool_use_id": self.call_id,
76 | "content": content,
77 | "is_error": self.error is not None,
78 | }
79 | ],
80 | }
81 |
82 | def to_ollama_message(self) -> dict[str, Any]:
83 | """Format as Ollama tool result (same as OpenAI)."""
84 | return self.to_openai_message()
85 |
86 | def to_vllm_message(self) -> dict[str, Any]:
87 | """Format as vLLM tool result (same as OpenAI)."""
88 | return self.to_openai_message()
89 |
90 | def to_provider_message(self, provider: str) -> dict[str, Any]:
91 | """
92 | Format as provider-specific message.
93 |
94 | Args:
95 | provider: Provider name
96 |
97 | Returns:
98 | Tool result in provider's expected format
99 | """
100 | provider_lower = provider.lower()
101 |
102 | if provider_lower in ("openai", "groq", "together", "huggingface"):
103 | return self.to_openai_message()
104 | elif provider_lower == "anthropic":
105 | return self.to_anthropic_message()
106 | elif provider_lower == "ollama":
107 | return self.to_ollama_message()
108 | elif provider_lower == "vllm":
109 | return self.to_vllm_message()
110 | else:
111 | # Default to OpenAI format
112 | logger.warning(f"Unknown provider '{provider}', using OpenAI format")
113 | return self.to_openai_message()
114 |
--------------------------------------------------------------------------------
/examples/langchain_basic_usage.py:
--------------------------------------------------------------------------------
1 | """Basic usage example for CascadeFlow LangChain integration.
2 |
3 | This example demonstrates:
4 | - Basic cascade setup with OpenAI models
5 | - Automatic quality-based routing
6 | - Cost tracking with metadata
7 | - Accessing cascade results
8 |
9 | Run:
10 | OPENAI_API_KEY=your-key python examples/langchain_basic_usage.py
11 | """
12 |
13 | import asyncio
14 | import os
15 |
16 | from langchain_openai import ChatOpenAI
17 |
18 | from cascadeflow.integrations.langchain import CascadeFlow
19 |
20 |
21 | async def main():
22 | # Verify API key is set
23 | if not os.getenv("OPENAI_API_KEY"):
24 | print("Error: OPENAI_API_KEY environment variable not set")
25 | return
26 |
27 | print("=" * 60)
28 | print("CascadeFlow LangChain Integration - Basic Usage")
29 | print("=" * 60)
30 |
31 | # Setup drafter (cheap, fast) and verifier (expensive, accurate)
32 | drafter = ChatOpenAI(model="gpt-4o-mini", temperature=0)
33 | verifier = ChatOpenAI(model="gpt-4o", temperature=0)
34 |
35 | # Create cascade with quality threshold
36 | cascade = CascadeFlow(
37 | drafter=drafter,
38 | verifier=verifier,
39 | quality_threshold=0.7,
40 | enable_cost_tracking=True,
41 | cost_tracking_provider="cascadeflow", # Use built-in pricing
42 | )
43 |
44 | print("\n1. Testing with simple question (should use drafter):")
45 | print("-" * 60)
46 |
47 | response = await cascade.ainvoke("What is 2+2?")
48 | result = cascade.get_last_cascade_result()
49 |
50 | print("\nQuestion: What is 2+2?")
51 | print(f"Response: {response.content}")
52 | print(f"\nModel used: {result['model_used']}")
53 | print(f"Drafter quality: {result.get('drafter_quality', 0):.2f}")
54 | print(f"Accepted: {result['accepted']}")
55 | print(f"Drafter cost: ${result['drafter_cost']:.6f}")
56 | print(f"Verifier cost: ${result['verifier_cost']:.6f}")
57 | print(f"Total cost: ${result['total_cost']:.6f}")
58 | print(f"Savings: {result['savings_percentage']:.1f}%")
59 | print(f"Latency: {result['latency_ms']:.0f}ms")
60 |
61 | print("\n2. Testing with complex question (may use verifier):")
62 | print("-" * 60)
63 |
64 | response = await cascade.ainvoke(
65 | "Explain the difference between synchronous and asynchronous programming "
66 | "in Python, including examples and best practices."
67 | )
68 | result = cascade.get_last_cascade_result()
69 |
70 | print("\nQuestion: Explain sync vs async in Python...")
71 | print(f"Response: {response.content[:200]}...")
72 | print(f"\nModel used: {result['model_used']}")
73 | print(f"Drafter quality: {result.get('drafter_quality', 0):.2f}")
74 | print(f"Accepted: {result['accepted']}")
75 | print(f"Drafter cost: ${result['drafter_cost']:.6f}")
76 | print(f"Verifier cost: ${result['verifier_cost']:.6f}")
77 | print(f"Total cost: ${result['total_cost']:.6f}")
78 | print(f"Savings: {result['savings_percentage']:.1f}%")
79 | print(f"Latency: {result['latency_ms']:.0f}ms")
80 |
81 | print("\n3. Testing bind() method:")
82 | print("-" * 60)
83 |
84 | # Create a bound instance with temperature
85 | bound_cascade = cascade.bind(temperature=1.0)
86 |
87 | response = await bound_cascade.ainvoke("Tell me a creative story in one sentence.")
88 | result = bound_cascade.get_last_cascade_result()
89 |
90 | print("\nQuestion: Tell me a creative story...")
91 | print(f"Response: {response.content}")
92 | print(f"\nModel used: {result['model_used']}")
93 | print(f"Accepted: {result['accepted']}")
94 |
95 | print("\n" + "=" * 60)
96 | print("Basic usage demo complete!")
97 | print("=" * 60)
98 |
99 |
100 | if __name__ == "__main__":
101 | asyncio.run(main())
102 |
--------------------------------------------------------------------------------
/cascadeflow/profiles/profile_manager.py:
--------------------------------------------------------------------------------
1 | """Profile manager for scaling to thousands of users."""
2 |
3 | import asyncio
4 | from collections.abc import Awaitable
5 | from datetime import datetime, timedelta
6 | from typing import Callable, Optional
7 |
8 | from .tier_config import TierLevel
9 | from .user_profile import UserProfile
10 |
11 |
12 | class UserProfileManager:
13 | """
14 | Manage user profiles at scale (thousands of users).
15 |
16 | Features:
17 | - In-memory caching (configurable TTL)
18 | - Database integration (via callback)
19 | - Bulk operations
20 | - Tier upgrades/downgrades
21 | """
22 |
23 | def __init__(
24 | self,
25 | cache_ttl_seconds: int = 300, # 5 minutes
26 | load_callback: Optional[Callable[[str], Awaitable[Optional[UserProfile]]]] = None,
27 | save_callback: Optional[Callable[[UserProfile], Awaitable[None]]] = None,
28 | ):
29 | """
30 | Initialize profile manager.
31 |
32 | Args:
33 | cache_ttl_seconds: How long to cache profiles in memory
34 | load_callback: Async function to load profile from database
35 | save_callback: Async function to save profile to database
36 | """
37 | self._cache: dict[str, tuple[UserProfile, datetime]] = {}
38 | self._cache_ttl = timedelta(seconds=cache_ttl_seconds)
39 | self._load_callback = load_callback
40 | self._save_callback = save_callback
41 | self._lock = asyncio.Lock()
42 |
43 | async def get_profile(self, user_id: str) -> UserProfile:
44 | """
45 | Get user profile (from cache or load).
46 |
47 | Fast path: Cached profile (microseconds)
48 | Slow path: Load from database (milliseconds)
49 | Default path: Create free tier profile (microseconds)
50 | """
51 | # Check cache
52 | if user_id in self._cache:
53 | profile, cached_at = self._cache[user_id]
54 | if datetime.utcnow() - cached_at < self._cache_ttl:
55 | return profile
56 |
57 | # Load from database
58 | async with self._lock:
59 | if self._load_callback:
60 | profile = await self._load_callback(user_id)
61 | if profile:
62 | self._cache[user_id] = (profile, datetime.utcnow())
63 | return profile
64 |
65 | # Default: Create free tier profile
66 | profile = UserProfile.from_tier(TierLevel.FREE, user_id=user_id)
67 | self._cache[user_id] = (profile, datetime.utcnow())
68 | return profile
69 |
70 | async def save_profile(self, profile: UserProfile) -> None:
71 | """Save profile to database and cache"""
72 | self._cache[profile.user_id] = (profile, datetime.utcnow())
73 | if self._save_callback:
74 | await self._save_callback(profile)
75 |
76 | async def update_tier(self, user_id: str, new_tier: TierLevel) -> UserProfile:
77 | """Upgrade/downgrade user tier"""
78 | from .tier_config import TierConfig
79 |
80 | profile = await self.get_profile(user_id)
81 | profile.tier = TierConfig.from_preset(new_tier)
82 | await self.save_profile(profile)
83 | return profile
84 |
85 | def invalidate_cache(self, user_id: str) -> None:
86 | """Invalidate cached profile (e.g., after tier change)"""
87 | if user_id in self._cache:
88 | del self._cache[user_id]
89 |
90 | def create_bulk(self, user_data: list[dict]) -> list[UserProfile]:
91 | """Create multiple profiles efficiently"""
92 | profiles = []
93 | for data in user_data:
94 | tier = TierLevel(data.get("tier", "free"))
95 | profile = UserProfile.from_tier(tier, user_id=data["user_id"])
96 | profiles.append(profile)
97 | self._cache[profile.user_id] = (profile, datetime.utcnow())
98 | return profiles
99 |
--------------------------------------------------------------------------------
/cascadeflow/profiles/tier_config.py:
--------------------------------------------------------------------------------
1 | """Tier configuration for user profiles."""
2 |
3 | from dataclasses import dataclass
4 | from enum import Enum
5 | from typing import Optional
6 |
7 |
8 | class TierLevel(str, Enum):
9 | """Predefined tier levels"""
10 |
11 | FREE = "free"
12 | STARTER = "starter"
13 | PRO = "pro"
14 | BUSINESS = "business"
15 | ENTERPRISE = "enterprise"
16 |
17 |
18 | @dataclass
19 | class TierConfig:
20 | """
21 | Tier configuration (one dimension of UserProfile).
22 |
23 | This represents subscription tiers with predefined limits and features.
24 | Can be used as-is or customized per user.
25 | """
26 |
27 | name: str
28 |
29 | # Budget limits
30 | daily_budget: Optional[float] = None
31 | weekly_budget: Optional[float] = None
32 | monthly_budget: Optional[float] = None
33 |
34 | # Rate limits
35 | requests_per_hour: Optional[int] = None
36 | requests_per_day: Optional[int] = None
37 | tokens_per_minute: Optional[int] = None
38 |
39 | # Feature flags
40 | enable_streaming: bool = True
41 | enable_batch: bool = False
42 | enable_embeddings: bool = False
43 |
44 | # Quality settings
45 | min_quality: float = 0.60
46 | target_quality: float = 0.80
47 |
48 | # Model access
49 | allowed_models: Optional[list[str]] = None
50 | blocked_models: Optional[list[str]] = None
51 |
52 | # Support level
53 | support_priority: str = "community" # community, priority, dedicated
54 |
55 | @classmethod
56 | def from_preset(cls, tier: TierLevel) -> "TierConfig":
57 | """Create TierConfig from predefined preset"""
58 | return TIER_PRESETS[tier]
59 |
60 |
61 | # Predefined tier presets
62 | TIER_PRESETS = {
63 | TierLevel.FREE: TierConfig(
64 | name="free",
65 | daily_budget=0.10,
66 | requests_per_hour=10,
67 | requests_per_day=100,
68 | enable_streaming=False,
69 | enable_batch=False,
70 | enable_embeddings=False,
71 | min_quality=0.60,
72 | target_quality=0.70,
73 | support_priority="community",
74 | ),
75 | TierLevel.STARTER: TierConfig(
76 | name="starter",
77 | daily_budget=1.00,
78 | requests_per_hour=100,
79 | requests_per_day=1000,
80 | enable_streaming=True,
81 | enable_batch=False,
82 | enable_embeddings=False,
83 | min_quality=0.70,
84 | target_quality=0.80,
85 | support_priority="community",
86 | ),
87 | TierLevel.PRO: TierConfig(
88 | name="pro",
89 | daily_budget=10.00,
90 | requests_per_hour=1000,
91 | requests_per_day=10000,
92 | tokens_per_minute=100000,
93 | enable_streaming=True,
94 | enable_batch=True,
95 | enable_embeddings=True,
96 | min_quality=0.75,
97 | target_quality=0.85,
98 | allowed_models=None, # All models
99 | support_priority="priority",
100 | ),
101 | TierLevel.BUSINESS: TierConfig(
102 | name="business",
103 | daily_budget=50.00,
104 | requests_per_hour=5000,
105 | requests_per_day=50000,
106 | tokens_per_minute=500000,
107 | enable_streaming=True,
108 | enable_batch=True,
109 | enable_embeddings=True,
110 | min_quality=0.80,
111 | target_quality=0.90,
112 | support_priority="priority",
113 | ),
114 | TierLevel.ENTERPRISE: TierConfig(
115 | name="enterprise",
116 | daily_budget=None, # Unlimited
117 | requests_per_hour=None, # Unlimited
118 | requests_per_day=None, # Unlimited
119 | tokens_per_minute=None, # Unlimited
120 | enable_streaming=True,
121 | enable_batch=True,
122 | enable_embeddings=True,
123 | min_quality=0.85,
124 | target_quality=0.95,
125 | support_priority="dedicated",
126 | ),
127 | }
128 |
--------------------------------------------------------------------------------
/cascadeflow/tools/call.py:
--------------------------------------------------------------------------------
1 | """
2 | Tool call parsing for cascadeflow.
3 |
4 | Handles parsing tool calls from different provider formats.
5 | """
6 |
7 | import json
8 | import logging
9 | from dataclasses import dataclass
10 | from typing import Any
11 |
12 | from .formats import ToolCallFormat
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | @dataclass
18 | class ToolCall:
19 | """
20 | Represents a tool call request from the model.
21 |
22 | This is returned by the model when it wants to use a tool.
23 | """
24 |
25 | id: str # Unique call ID (for tracking)
26 | name: str # Tool name
27 | arguments: dict[str, Any] # Tool arguments
28 | provider_format: ToolCallFormat # Original format from provider
29 |
30 | @classmethod
31 | def from_openai(cls, tool_call: dict[str, Any]) -> "ToolCall":
32 | """
33 | Parse OpenAI tool call format.
34 |
35 | Format:
36 | {
37 | "id": "call_123",
38 | "type": "function",
39 | "function": {
40 | "name": "get_weather",
41 | "arguments": '{"location": "Paris"}'
42 | }
43 | }
44 | """
45 | try:
46 | arguments = json.loads(tool_call["function"]["arguments"])
47 | except (json.JSONDecodeError, KeyError) as e:
48 | logger.error(f"Failed to parse OpenAI tool call arguments: {e}")
49 | arguments = {}
50 |
51 | return cls(
52 | id=tool_call["id"],
53 | name=tool_call["function"]["name"],
54 | arguments=arguments,
55 | provider_format=ToolCallFormat.OPENAI,
56 | )
57 |
58 | @classmethod
59 | def from_anthropic(cls, tool_use: dict[str, Any]) -> "ToolCall":
60 | """
61 | Parse Anthropic tool use format.
62 |
63 | Format:
64 | {
65 | "type": "tool_use",
66 | "id": "toolu_123",
67 | "name": "get_weather",
68 | "input": {
69 | "location": "Paris"
70 | }
71 | }
72 | """
73 | return cls(
74 | id=tool_use["id"],
75 | name=tool_use["name"],
76 | arguments=tool_use.get("input", {}),
77 | provider_format=ToolCallFormat.ANTHROPIC,
78 | )
79 |
80 | @classmethod
81 | def from_ollama(cls, tool_call: dict[str, Any]) -> "ToolCall":
82 | """Parse Ollama tool call format (same as OpenAI)."""
83 | return cls.from_openai(tool_call)
84 |
85 | @classmethod
86 | def from_vllm(cls, tool_call: dict[str, Any]) -> "ToolCall":
87 | """Parse vLLM tool call format (same as OpenAI)."""
88 | return cls.from_openai(tool_call)
89 |
90 | @classmethod
91 | def from_provider(cls, provider: str, tool_call: dict[str, Any]) -> "ToolCall":
92 | """
93 | Parse tool call from any provider format.
94 |
95 | Args:
96 | provider: Provider name
97 | tool_call: Raw tool call from provider response
98 |
99 | Returns:
100 | Standardized ToolCall object
101 | """
102 | provider_lower = provider.lower()
103 |
104 | if provider_lower in ("openai", "groq", "together", "huggingface"):
105 | return cls.from_openai(tool_call)
106 | elif provider_lower == "anthropic":
107 | return cls.from_anthropic(tool_call)
108 | elif provider_lower == "ollama":
109 | return cls.from_ollama(tool_call)
110 | elif provider_lower == "vllm":
111 | return cls.from_vllm(tool_call)
112 | else:
113 | # Try OpenAI format as default
114 | try:
115 | return cls.from_openai(tool_call)
116 | except Exception as e:
117 | logger.error(f"Failed to parse tool call from {provider}: {e}")
118 | raise ValueError(f"Unsupported tool call format from provider '{provider}'")
119 |
--------------------------------------------------------------------------------
/cascadeflow/scripts/format_code.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # cascadeflow Code Formatting Script
3 | # Run this before every commit and definitely before launch!
4 |
5 | set -e # Exit on any error
6 |
7 | echo "🎨 cascadeflow - Code Formatting Script"
8 | echo "========================================"
9 | echo ""
10 |
11 | # Check if virtual environment is activated
12 | if [[ -z "$VIRTUAL_ENV" ]]; then
13 | echo "⚠️ Virtual environment not activated!"
14 | echo "Please run: source .venv/bin/activate"
15 | exit 1
16 | fi
17 |
18 | # Check if we're in the right directory
19 | if [[ ! -f "pyproject.toml" ]]; then
20 | echo "❌ Error: Not in project root directory"
21 | echo "Please run this script from the cascadeflow/ directory"
22 | exit 1
23 | fi
24 |
25 | echo "✅ Virtual environment: $VIRTUAL_ENV"
26 | echo "✅ Working directory: $(pwd)"
27 | echo ""
28 |
29 | # Install formatting tools if not present
30 | echo "📦 Checking formatting tools..."
31 | pip install --quiet --upgrade black isort ruff mypy 2>/dev/null || {
32 | echo "Installing formatting tools..."
33 | pip install black isort ruff mypy
34 | }
35 | echo "✅ Formatting tools ready"
36 | echo ""
37 |
38 | # Step 1: Black - Code formatting
39 | echo "🎨 Step 1/4: Running Black (code formatter)..."
40 | echo "-------------------------------------------"
41 | black cascadeflow/ tests/ examples/ --line-length 100 || {
42 | echo "❌ Black formatting failed"
43 | exit 1
44 | }
45 | echo "✅ Black formatting complete"
46 | echo ""
47 |
48 | # Step 2: isort - Import sorting
49 | echo "📦 Step 2/4: Running isort (import sorter)..."
50 | echo "--------------------------------------------"
51 | isort cascadeflow/ tests/ examples/ --profile black --line-length 100 || {
52 | echo "❌ isort failed"
53 | exit 1
54 | }
55 | echo "✅ Import sorting complete"
56 | echo ""
57 |
58 | # Step 3: Ruff - Linting and auto-fix
59 | echo "🔍 Step 3/4: Running Ruff (linter)..."
60 | echo "------------------------------------"
61 | echo "Checking for issues..."
62 | ruff check cascadeflow/ tests/ examples/ --fix || {
63 | echo "⚠️ Ruff found some issues"
64 | echo "Attempting to auto-fix..."
65 | ruff check cascadeflow/ tests/ examples/ --fix --unsafe-fixes || {
66 | echo "❌ Some issues need manual fixing"
67 | echo "Review the output above and fix manually"
68 | exit 1
69 | }
70 | }
71 | echo "✅ Linting complete"
72 | echo ""
73 |
74 | # Step 4: mypy - Type checking (optional, won't fail)
75 | echo "🔤 Step 4/4: Running mypy (type checker)..."
76 | echo "-----------------------------------------"
77 | mypy cascadeflow/ --ignore-missing-imports --no-strict-optional || {
78 | echo "⚠️ Type checking found some issues (non-critical)"
79 | echo "Consider fixing these before launch, but not required"
80 | }
81 | echo "✅ Type checking complete"
82 | echo ""
83 |
84 | # Final verification
85 | echo "🧪 Running quick verification..."
86 | echo "------------------------------"
87 |
88 | # Check if there are any .py files with syntax errors
89 | python -m py_compile cascadeflow/*.py 2>/dev/null || {
90 | echo "❌ Syntax errors detected in cascadeflow/"
91 | exit 1
92 | }
93 |
94 | python -m py_compile tests/*.py 2>/dev/null || {
95 | echo "⚠️ Syntax errors in tests/ (check manually)"
96 | }
97 |
98 | echo "✅ Syntax verification passed"
99 | echo ""
100 |
101 | # Summary
102 | echo "======================================"
103 | echo "✨ Code Formatting Complete!"
104 | echo "======================================"
105 | echo ""
106 | echo "Summary:"
107 | echo " ✅ Black formatting applied"
108 | echo " ✅ Imports sorted with isort"
109 | echo " ✅ Linting issues fixed with Ruff"
110 | echo " ✅ Type checking completed"
111 | echo " ✅ Syntax verification passed"
112 | echo ""
113 | echo "Next steps:"
114 | echo " 1. Review changes: git diff"
115 | echo " 2. Run tests: pytest tests/ -v"
116 | echo " 3. Commit: git add . && git commit -m 'style: Format code with Black/isort/Ruff'"
117 | echo ""
118 | echo "🚀 Ready for launch!"
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CascadeFlow Code Owners
2 | # =======================
3 | #
4 | # This file defines who is responsible for reviewing code changes in specific
5 | # parts of the repository. Code owners are automatically requested for review
6 | # when someone opens a pull request that modifies code they own.
7 | #
8 | # Learn more: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
9 |
10 | # ============================================================================
11 | # GLOBAL OWNER (All files)
12 | # ============================================================================
13 | # The repository owner must approve ALL pull requests
14 | * @saschabuehrle
15 |
16 | # ============================================================================
17 | # CRITICAL FILES (Extra protection for package distribution)
18 | # ============================================================================
19 |
20 | # Python package configuration
21 | /pyproject.toml @saschabuehrle
22 | /setup.py @saschabuehrle
23 | /setup.cfg @saschabuehrle
24 | /requirements*.txt @saschabuehrle
25 | /MANIFEST.in @saschabuehrle
26 |
27 | # TypeScript/JavaScript package configuration
28 | /packages/*/package.json @saschabuehrle
29 | /package.json @saschabuehrle
30 | /pnpm-workspace.yaml @saschabuehrle
31 | /pnpm-lock.yaml @saschabuehrle
32 | /tsconfig*.json @saschabuehrle
33 |
34 | # GitHub workflows, actions, and settings
35 | /.github/workflows/ @saschabuehrle
36 | /.github/actions/ @saschabuehrle
37 | /.github/CODEOWNERS @saschabuehrle
38 |
39 | # Security
40 | /SECURITY.md @saschabuehrle
41 | /.env.example @saschabuehrle
42 |
43 | # ============================================================================
44 | # CORE LIBRARY CODE (Python)
45 | # ============================================================================
46 |
47 | # Core agent and orchestration
48 | /cascadeflow/agent.py @saschabuehrle
49 | /cascadeflow/config.py @saschabuehrle
50 | /cascadeflow/schema/ @saschabuehrle
51 |
52 | # Provider implementations - critical for integrations
53 | /cascadeflow/providers/ @saschabuehrle
54 |
55 | # Quality validation and routing systems
56 | /cascadeflow/quality/ @saschabuehrle
57 | /cascadeflow/routing/ @saschabuehrle
58 |
59 | # Utilities and helpers
60 | /cascadeflow/utils/ @saschabuehrle
61 |
62 | # All other cascadeflow code
63 | /cascadeflow/ @saschabuehrle
64 |
65 | # ============================================================================
66 | # CORE LIBRARY CODE (TypeScript)
67 | # ============================================================================
68 | /packages/core/src/ @saschabuehrle
69 | /packages/integrations/ @saschabuehrle
70 | /packages/ml/ @saschabuehrle
71 |
72 | # ============================================================================
73 | # TESTS
74 | # ============================================================================
75 | /tests/ @saschabuehrle
76 | /packages/*/tests/ @saschabuehrle
77 | /packages/*/__tests__/ @saschabuehrle
78 |
79 | # ============================================================================
80 | # DOCUMENTATION
81 | # ============================================================================
82 | /docs/ @saschabuehrle
83 | /README.md @saschabuehrle
84 | /CHANGELOG.md @saschabuehrle
85 | /CONTRIBUTING.md @saschabuehrle
86 | /LICENSE @saschabuehrle
87 |
88 | # ============================================================================
89 | # EXAMPLES
90 | # ============================================================================
91 | /examples/ @saschabuehrle
92 | /packages/*/examples/ @saschabuehrle
93 |
94 | # ============================================================================
95 | # NOTES FOR CONTRIBUTORS
96 | # ============================================================================
97 | # - All PRs require approval from @saschabuehrle (enforced by branch protection)
98 | # - Direct commits to main are blocked (enforced by branch protection)
99 | # - All changes must go through feature branches and PRs
100 | # - Even repository admins cannot bypass these rules (enforce_admins: true)
--------------------------------------------------------------------------------
/packages/core/examples/browser/README.md:
--------------------------------------------------------------------------------
1 | # Browser Examples for cascadeflow
2 |
3 | This directory contains examples for using cascadeflow in browser environments.
4 |
5 | ## Security Note
6 |
7 | **NEVER expose API keys in browser code!** All examples use a backend proxy or edge function to securely handle API keys.
8 |
9 | ## Examples
10 |
11 | ### 1. Vercel Edge Function (`vercel-edge/`)
12 |
13 | Deploy cascadeflow as a Vercel Edge Function for global, low-latency inference.
14 |
15 | **Pros:**
16 | - Global edge network (low latency)
17 | - Serverless (no infrastructure)
18 | - Easy deployment
19 |
20 | **Cons:**
21 | - Vendor lock-in (Vercel)
22 | - Cold starts
23 |
24 | ## Quick Start
25 |
26 | ```bash
27 | cd vercel-edge
28 | npm install
29 | vercel dev # Test locally
30 | vercel deploy # Deploy to production
31 | ```
32 |
33 | ## Usage Patterns
34 |
35 | ### Pattern 1: Edge Function (Serverless)
36 |
37 | Best for: Public-facing apps, global users, low latency
38 |
39 | ```typescript
40 | // Edge function handles everything
41 | import { CascadeAgent } from '@cascadeflow/core';
42 |
43 | export default async function handler(req: Request) {
44 | // Recommended: Claude Haiku + GPT-5
45 | const agent = new CascadeAgent({
46 | models: [
47 | { name: 'claude-3-5-haiku-20241022', provider: 'anthropic', cost: 0.0008, apiKey: process.env.ANTHROPIC_API_KEY },
48 | { name: 'gpt-5', provider: 'openai', cost: 0.00125, apiKey: process.env.OPENAI_API_KEY }
49 | ]
50 | });
51 |
52 | const { query } = await req.json();
53 | const result = await agent.run(query);
54 |
55 | return Response.json(result);
56 | }
57 | ```
58 |
59 | ### Pattern 2: Backend API + Frontend
60 |
61 | Best for: Enterprise apps, existing backends, fine-grained control
62 |
63 | ```typescript
64 | // Backend (Express)
65 | app.post('/api/cascade', async (req, res) => {
66 | const agent = new CascadeAgent({ /* config */ });
67 | const result = await agent.run(req.body.query);
68 | res.json(result);
69 | });
70 |
71 | // Frontend (Browser)
72 | const response = await fetch('/api/cascade', {
73 | method: 'POST',
74 | headers: { 'Content-Type': 'application/json' },
75 | body: JSON.stringify({ query: 'What is TypeScript?' })
76 | });
77 | const result = await response.json();
78 | ```
79 |
80 | ### Pattern 3: Direct Browser (Multi-Provider Support)
81 |
82 | Best for: When you already have a proxy endpoint
83 |
84 | All providers automatically work in browser through runtime detection:
85 |
86 | ```typescript
87 | import { CascadeAgent } from '@cascadeflow/core';
88 |
89 | const agent = new CascadeAgent({
90 | models: [
91 | {
92 | name: 'claude-3-5-haiku-20241022',
93 | provider: 'anthropic',
94 | cost: 0.0008,
95 | proxyUrl: '/api/anthropic-proxy' // Your proxy endpoint
96 | },
97 | {
98 | name: 'gpt-5',
99 | provider: 'openai',
100 | cost: 0.00125,
101 | proxyUrl: '/api/openai-proxy' // Your proxy endpoint
102 | }
103 | ]
104 | });
105 |
106 | const result = await agent.run('Hello!');
107 | console.log(`Savings: ${result.savingsPercentage}%`);
108 | ```
109 |
110 | **All 7 providers work in browser:**
111 | OpenAI, Anthropic, Groq, Together AI, Ollama, HuggingFace, vLLM
112 |
113 | ## Environment Variables
114 |
115 | All examples require:
116 |
117 | ```bash
118 | OPENAI_API_KEY=sk-...
119 | ```
120 |
121 | For Vercel:
122 | ```bash
123 | vercel env add OPENAI_API_KEY
124 | ```
125 |
126 | For Cloudflare:
127 | ```bash
128 | npx wrangler secret put OPENAI_API_KEY
129 | ```
130 |
131 | For Express:
132 | ```bash
133 | # Create .env file
134 | echo "OPENAI_API_KEY=sk-..." > .env
135 | ```
136 |
137 | ## Cost Tracking in Browser
138 |
139 | All examples return full CascadeResult:
140 |
141 | ```typescript
142 | {
143 | content: "...",
144 | modelUsed: "gpt-4o-mini",
145 | totalCost: 0.000211,
146 | savingsPercentage: 97.8,
147 | cascaded: true,
148 | draftAccepted: true,
149 | // ... more fields
150 | }
151 | ```
152 |
153 | Display savings to users:
154 |
155 | ```javascript
156 | document.getElementById('savings').textContent =
157 | `Saved ${result.savingsPercentage}% vs using ${result.verifierModel || 'best model'}`;
158 | ```
159 |
--------------------------------------------------------------------------------
/cascadeflow/utils/caching.py:
--------------------------------------------------------------------------------
1 | """
2 | Response caching system.
3 |
4 | Provides:
5 | - In-memory LRU cache
6 | - Cache key generation
7 | - TTL support
8 | - Cache statistics
9 | """
10 |
11 | import hashlib
12 | import logging
13 | import time
14 | from collections import OrderedDict
15 | from typing import Any, Optional
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | class ResponseCache:
21 | """
22 | Simple in-memory LRU cache for responses.
23 |
24 | Example:
25 | >>> cache = ResponseCache(max_size=1000, default_ttl=3600)
26 | >>>
27 | >>> # Store response
28 | >>> cache.set("What is 2+2?", response_data, ttl=600)
29 | >>>
30 | >>> # Retrieve response
31 | >>> cached = cache.get("What is 2+2?")
32 | >>> if cached:
33 | ... print("Cache hit!")
34 | """
35 |
36 | def __init__(self, max_size: int = 1000, default_ttl: int = 3600):
37 | """
38 | Initialize cache.
39 |
40 | Args:
41 | max_size: Maximum number of cached items
42 | default_ttl: Default TTL in seconds
43 | """
44 | self.max_size = max_size
45 | self.default_ttl = default_ttl
46 | self.cache: OrderedDict = OrderedDict()
47 | self.stats = {"hits": 0, "misses": 0, "sets": 0, "evictions": 0}
48 |
49 | def _generate_key(
50 | self, query: str, model: Optional[str] = None, params: Optional[dict[str, Any]] = None
51 | ) -> str:
52 | """Generate cache key from query and parameters."""
53 | key_data = {"query": query, "model": model, "params": params or {}}
54 | key_str = str(sorted(key_data.items()))
55 | return hashlib.sha256(key_str.encode()).hexdigest()
56 |
57 | def get(
58 | self, query: str, model: Optional[str] = None, params: Optional[dict[str, Any]] = None
59 | ) -> Optional[dict[str, Any]]:
60 | """
61 | Get cached response.
62 |
63 | Returns None if not found or expired.
64 | """
65 | key = self._generate_key(query, model, params)
66 |
67 | if key not in self.cache:
68 | self.stats["misses"] += 1
69 | return None
70 |
71 | # Check TTL
72 | entry = self.cache[key]
73 | if time.time() > entry["expires_at"]:
74 | # Expired
75 | del self.cache[key]
76 | self.stats["misses"] += 1
77 | return None
78 |
79 | # Move to end (LRU)
80 | self.cache.move_to_end(key)
81 | self.stats["hits"] += 1
82 |
83 | logger.debug(f"Cache hit for query: {query[:50]}...")
84 | return entry["response"]
85 |
86 | def set(
87 | self,
88 | query: str,
89 | response: dict[str, Any],
90 | model: Optional[str] = None,
91 | params: Optional[dict[str, Any]] = None,
92 | ttl: Optional[int] = None,
93 | ):
94 | """Set cache entry."""
95 | key = self._generate_key(query, model, params)
96 |
97 | # Evict if full
98 | if len(self.cache) >= self.max_size:
99 | # Remove oldest (first item)
100 | self.cache.popitem(last=False)
101 | self.stats["evictions"] += 1
102 |
103 | # Add entry
104 | self.cache[key] = {
105 | "response": response,
106 | "created_at": time.time(),
107 | "expires_at": time.time() + (ttl or self.default_ttl),
108 | }
109 | self.stats["sets"] += 1
110 |
111 | logger.debug(f"Cached response for query: {query[:50]}...")
112 |
113 | def clear(self):
114 | """Clear all cache."""
115 | self.cache.clear()
116 | logger.info("Cache cleared")
117 |
118 | def get_stats(self) -> dict[str, Any]:
119 | """Get cache statistics."""
120 | hit_rate = (
121 | self.stats["hits"] / (self.stats["hits"] + self.stats["misses"])
122 | if self.stats["hits"] + self.stats["misses"] > 0
123 | else 0
124 | )
125 |
126 | return {
127 | **self.stats,
128 | "size": len(self.cache),
129 | "max_size": self.max_size,
130 | "hit_rate": hit_rate,
131 | }
132 |
--------------------------------------------------------------------------------
/.github/assets/CF_logo_dark.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------