├── .env.example
├── .github
    └── workflows
    │   ├── docker-publish.yml
    │   └── go-ci.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── ROADMAP.md
├── TODO.md
├── bin
    └── golangci-lint
├── cmd
    └── hapax
    │   └── main.go
├── config.example.yaml
├── config
    ├── config.go
    ├── config_env_test.go
    ├── config_test.go
    ├── config_watcher.go
    ├── http3_test.go
    ├── processing.go
    └── watcher_interface.go
├── docker-compose.yml
├── docs
    ├── _config.yml
    ├── api.md
    ├── assets
    │   └── css
    │   │   └── custom.css
    ├── blog
    │   └── why-we-built-hapax.md
    ├── configuration.md
    ├── getting-started.md
    ├── getting-started
    │   ├── 5-minute-setup.md
    │   └── index.md
    ├── index.md
    ├── installation.md
    ├── operations.md
    ├── performance.md
    └── security.md
├── errors
    ├── errors.go
    ├── errors_test.go
    ├── handlers.go
    ├── handlers_test.go
    ├── responses.go
    ├── responses_test.go
    ├── types.go
    └── types_test.go
├── examples
    ├── hapax.yaml
    └── requests.md
├── go.mod
├── go.sum
├── golangci.yml
├── main.go
├── prometheus.yml
├── server
    ├── circuitbreaker
    │   ├── circuitbreaker.go
    │   └── errors.go
    ├── handlers
    │   ├── completion.go
    │   ├── completion_test.go
    │   └── integration_test.go
    ├── http3_0rtt_test.go
    ├── http3_test.go
    ├── metrics
    │   └── metrics.go
    ├── middleware
    │   ├── auth.go
    │   ├── constants.go
    │   ├── logging.go
    │   ├── metrics.go
    │   ├── metrics_test.go
    │   ├── middleware.go
    │   ├── middleware_test.go
    │   ├── queue.go
    │   ├── queue_test.go
    │   ├── ratelimit.go
    │   ├── ratelimit_test.go
    │   ├── recovery.go
    │   ├── request_id.go
    │   └── timeout.go
    ├── mock_test.go
    ├── mocks
    │   ├── config_watcher.go
    │   └── llm.go
    ├── processing
    │   ├── processor.go
    │   ├── processor_test.go
    │   └── types.go
    ├── provider
    │   ├── errors.go
    │   ├── execution.go
    │   ├── health.go
    │   ├── manager_singleflight_test.go
    │   ├── metrics.go
    │   ├── provider.go
    │   └── provider_test.go
    ├── routing
    │   ├── metrics.go
    │   ├── metrics_test.go
    │   ├── router.go
    │   └── router_test.go
    ├── server.go
    ├── server_test.go
    └── validation
    │   ├── middleware.go
    │   ├── middleware_test.go
    │   ├── schema.go
    │   └── schema_test.go
└── tests
    ├── circuitbreaker_test.go
    └── docker_test.go


/.env.example:
--------------------------------------------------------------------------------
1 | # .env.example
2 | # ANTHROPIC_API_KEY=
3 | # OPENAI_API_KEY=


--------------------------------------------------------------------------------
/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Build and Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 |     branches:
 8 |       - main
 9 |   pull_request:
10 |     branches:
11 |       - main
12 | 
13 | jobs:
14 |   build-and-push:
15 |     runs-on: ubuntu-latest
16 |     
17 |     steps:
18 |       - name: Checkout repository
19 |         uses: actions/checkout@v4
20 |         with:
21 |           fetch-depth: 0  # Ensures all tags are fetched for versioning
22 | 
23 |       - name: Set up Docker Buildx
24 |         uses: docker/setup-buildx-action@v3
25 | 
26 |       - name: Login to Docker Hub
27 |         if: github.event_name != 'pull_request'
28 |         uses: docker/login-action@v3
29 |         with:
30 |           username: ${{ secrets.DOCKERHUB_USERNAME }}
31 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
32 |           ecr: auto
33 |           logout: true
34 | 
35 |       - name: Extract metadata for Docker
36 |         id: meta
37 |         uses: docker/metadata-action@v5
38 |         with:
39 |           images: teilomillet/hapax
40 |           tags: |
41 |             type=semver,pattern={{version}}
42 |             type=semver,pattern={{major}}.{{minor}}
43 |             type=sha,prefix=
44 |             type=ref,event=branch
45 |             type=ref,event=pr
46 |             type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }}
47 | 
48 |       - name: Build and push Docker image
49 |         uses: docker/build-push-action@v5
50 |         with:
51 |           context: .
52 |           push: ${{ github.event_name != 'pull_request' }}
53 |           tags: ${{ steps.meta.outputs.tags }}
54 |           labels: ${{ steps.meta.outputs.labels }}
55 |           cache-from: type=gha
56 |           cache-to: type=gha,mode=max


--------------------------------------------------------------------------------
/.github/workflows/go-ci.yml:
--------------------------------------------------------------------------------
  1 | name: Go CI/CD
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ main ]
  6 |     tags: 
  7 |       - 'v[0-9]+.[0-9]+.[0-9]+'  # Matches v0.1.0, v1.0.0, etc.
  8 |   pull_request:
  9 |     branches: [ main ]
 10 | 
 11 | permissions:
 12 |   contents: write
 13 |   pull-requests: write
 14 | 
 15 | jobs:
 16 |   test:
 17 |     name: Test & Lint
 18 |     runs-on: ubuntu-latest
 19 |     steps:
 20 |       - uses: actions/checkout@v4
 21 |         with:
 22 |           fetch-depth: 0
 23 | 
 24 |       - name: Set up Go
 25 |         uses: actions/setup-go@v4
 26 |         with:
 27 |           go-version: '1.21'
 28 |           cache: true
 29 |     
 30 |       - name: Install dependencies
 31 |         run: |
 32 |           go mod download
 33 |           go install golang.org/x/tools/cmd/goimports@latest
 34 | 
 35 |       - name: Run tests
 36 |         run: go test -race -coverprofile=coverage.txt -covermode=atomic ./...
 37 | 
 38 |       - name: Run golangci-lint
 39 |         uses: golangci/golangci-lint-action@v3
 40 |         with:
 41 |           version: latest
 42 |           args: --timeout=5m --out-format=colored-line-number --issues-exit-code=1
 43 |           only-new-issues: true
 44 |           skip-pkg-cache: true
 45 |           skip-build-cache: false
 46 | 
 47 |       - name: Upload coverage
 48 |         if: success()
 49 |         uses: codecov/codecov-action@v3
 50 |         with:
 51 |           file: ./coverage.txt
 52 |           fail_ci_if_error: false
 53 |           verbose: true
 54 | 
 55 |   build:
 56 |     name: Build Binary
 57 |     needs: test
 58 |     runs-on: ubuntu-latest
 59 |     steps:
 60 |       - uses: actions/checkout@v4
 61 |         with:
 62 |           fetch-depth: 0
 63 | 
 64 |       - name: Set up Go
 65 |         uses: actions/setup-go@v4
 66 |         with:
 67 |           go-version: '1.21'
 68 |           cache: true
 69 | 
 70 |       - name: Build
 71 |         run: |
 72 |           VERSION=$(git describe --tags --always --dirty)
 73 |           mkdir -p hapax
 74 |           go build -v -ldflags="-X main.Version=${VERSION}" -o hapax/hapax .
 75 |         
 76 |       - name: Upload artifact
 77 |         uses: actions/upload-artifact@v4
 78 |         with:
 79 |           name: hapax-binary
 80 |           path: ./hapax
 81 |           retention-days: 5
 82 | 
 83 |   release:
 84 |     name: Create Release
 85 |     needs: [test, build]
 86 |     runs-on: ubuntu-latest
 87 |     if: github.event_name == 'push' && contains(github.ref, 'refs/tags/v')
 88 |     steps:
 89 |       - uses: actions/checkout@v4
 90 |         with:
 91 |           fetch-depth: 0
 92 | 
 93 |       - name: Generate changelog
 94 |         id: changelog
 95 |         run: |
 96 |           echo "CHANGELOG<<EOF" >> $GITHUB_ENV
 97 |           
 98 |           # Get tag message for overview
 99 |           TAG_MSG=$(git tag -l --format='%(contents)' $(git describe --tags --abbrev=0))
100 |           echo "$TAG_MSG" >> $GITHUB_ENV
101 |           echo "" >> $GITHUB_ENV
102 |           
103 |           # For first release, get all commits
104 |           if ! git tag --sort=-v:refname | grep -q '^v'; then
105 |             RANGE="$(git rev-list --max-parents=0 HEAD)..HEAD"
106 |           else
107 |             RANGE="$(git describe --tags --abbrev=0 HEAD^)..HEAD"
108 |           fi
109 |           
110 |           echo "## Changes" >> $GITHUB_ENV
111 |           echo "" >> $GITHUB_ENV
112 |           
113 |           # Features
114 |           echo "### Features" >> $GITHUB_ENV
115 |           # All commits, with attribution only for contributors
116 |           git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* feat' | sed 's/feat: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true
117 |           echo "" >> $GITHUB_ENV
118 |           
119 |           # Fixes
120 |           echo "### Bug Fixes" >> $GITHUB_ENV
121 |           # All commits, with attribution only for contributors
122 |           git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* fix' | sed 's/fix: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true
123 |           echo "" >> $GITHUB_ENV
124 |           
125 |           # Documentation
126 |           echo "### Documentation" >> $GITHUB_ENV
127 |           # All commits, with attribution only for contributors
128 |           git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* docs' | sed 's/docs: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true
129 |           echo "" >> $GITHUB_ENV
130 | 
131 |           # Dependencies
132 |           echo "## Dependency Updates" >> $GITHUB_ENV
133 |           echo "" >> $GITHUB_ENV
134 |           if [ -f "go.mod" ]; then
135 |             echo '```diff' >> $GITHUB_ENV
136 |             if git rev-parse --verify HEAD^ >/dev/null 2>&1; then
137 |               git diff HEAD^ HEAD go.mod | grep '^[+-]' | grep -v '^[+-]module' >> $GITHUB_ENV || true
138 |             fi
139 |             echo '```' >> $GITHUB_ENV
140 |           fi
141 |           echo "" >> $GITHUB_ENV
142 |           
143 |           # List contributors (excluding maintainer)
144 |           echo "## Contributors" >> $GITHUB_ENV
145 |           git log $RANGE --format='%aE' | sort -u | grep -v 'teilomillet' | sed 's/.*@/@/' | while read handle; do
146 |             echo "* $handle" >> $GITHUB_ENV
147 |           done
148 |           
149 |           echo "EOF" >> $GITHUB_ENV
150 | 
151 |       - name: Download binary
152 |         uses: actions/download-artifact@v4
153 |         with:
154 |           name: hapax-binary
155 |           path: ./
156 | 
157 |       - name: Create Release
158 |         uses: softprops/action-gh-release@v1
159 |         env:
160 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
161 |         with:
162 |           body: ${{ env.CHANGELOG }}
163 |           files: ./hapax/hapax
164 |           draft: false
165 |           prerelease: false
166 |           generate_release_notes: false


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you prefer the allow list template instead of the deny list, see community template:
 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 3 | #
 4 | # Binaries for programs and plugins
 5 | *.exe
 6 | *.exe~
 7 | *.dll
 8 | *.so
 9 | *.dylib
10 | 
11 | # Test binary, built with `go test -c`
12 | *.test
13 | 
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 | 
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 | 
20 | # Go workspace file
21 | go.work
22 | go.work.sum
23 | 
24 | # env file
25 | .env
26 | 
27 | # Config files
28 | config.yaml
29 | 
30 | /hapax
31 | coverage.txt
32 | go


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Build stage
 2 | FROM golang:1.22-alpine AS builder
 3 | 
 4 | # Install build dependencies
 5 | RUN apk add --no-cache git gcc musl-dev
 6 | 
 7 | # Set working directory
 8 | WORKDIR /app
 9 | 
10 | # Copy go mod and sum files
11 | COPY go.mod go.sum ./
12 | 
13 | # Download dependencies
14 | RUN go mod download
15 | 
16 | # Copy source code
17 | COPY . .
18 | 
19 | # Build the application
20 | RUN CGO_ENABLED=0 GOOS=linux go build -o hapax ./cmd/hapax
21 | 
22 | # Final stage
23 | FROM alpine:3.19
24 | 
25 | # Add non-root user
26 | RUN adduser -D -g '' hapax
27 | 
28 | # Install runtime dependencies
29 | RUN apk add --no-cache ca-certificates tzdata curl
30 | 
31 | # Set working directory
32 | WORKDIR /app
33 | 
34 | # Copy binary and configuration files
35 | COPY --from=builder /app/hapax .
36 | 
37 | COPY config.example.yaml ./config.yaml
38 | COPY docker-compose.yml ./docker-compose.yml
39 | COPY prometheus.yml ./prometheus.yml
40 | 
41 | # Use non-root user
42 | USER hapax
43 | 
44 | # Expose ports
45 | EXPOSE 8081
46 | 
47 | # Set healthcheck
48 | HEALTHCHECK --interval=10s --timeout=5s --start-period=10s --retries=3 \
49 |   CMD curl -f http://localhost:8081/health || exit 1
50 | 
51 | # Run the application
52 | ENTRYPOINT ["./hapax"]
53 | 
54 | CMD ["--config", "config.yaml"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Hapax: AI Infrastructure
  2 | 
  3 | Hapax is a production-ready AI infrastructure layer that ensures uninterrupted AI operations through intelligent provider management and automatic failover. Named after the Greek word ἅπαξ (meaning "once"), it embodies our core promise: configure once, then let it seamlessly manage your AI infrastructure.
  4 | 
  5 | ## Common AI Infrastructure Challenges
  6 | 
  7 | Organizations face several critical challenges in managing their AI infrastructure. Service disruptions from AI provider outages create direct revenue impacts, while engineering teams dedicate significant resources to managing multiple AI providers. Teams struggle with limited visibility into AI usage across departments, compounded by complex integration requirements spanning different AI providers.
  8 | 
  9 | ## Core Capabilities
 10 | 
 11 | Hapax delivers a robust infrastructure layer through three core capabilities:
 12 | 
 13 | ### Intelligent Provider Management
 14 | The system ensures continuous service through real-time health monitoring with configurable timeouts and check intervals. Automatic failover between providers maintains zero downtime, while a sophisticated three-state circuit breaker (closed, half-open, open) with configurable thresholds prevents cascade failures. Request deduplication using the singleflight pattern optimizes resource utilization.
 15 | 
 16 | ### Production-Ready Architecture
 17 | The architecture prioritizes reliability through high-performance request routing and load balancing. Comprehensive error handling and request validation ensure data integrity, while structured logging with request tracing enables detailed debugging. Configurable timeout and rate limiting mechanisms protect system resources.
 18 | 
 19 | ### Security & Monitoring
 20 | Security is foundational, implemented through API key-based authentication and comprehensive request validation and sanitization. The monitoring system provides granular usage tracking per endpoint and detailed request logging for operational visibility.
 21 | 
 22 | ## Usage Tracking & Monitoring
 23 | 
 24 | Hapax provides built-in monitoring capabilities through Prometheus integration, offering comprehensive visibility into your AI infrastructure:
 25 | 
 26 | ### Request Tracking
 27 | Monitor API usage through versioned endpoints:
 28 | ```bash
 29 | # Standard endpoint structure
 30 | /v1/completions
 31 | /health          # Global system health status
 32 | /v1/health       # Versioned API health status
 33 | /metrics
 34 | ```
 35 | 
 36 | ### Prometheus Integration
 37 | The monitoring system tracks essential metrics including request counts and status by endpoint, request latencies, active request volume, error rates by provider, and circuit breaker states. Health check performance metrics and request deduplication statistics provide deep insights into system efficiency.
 38 | 
 39 | Each metric is designed for operational visibility:
 40 | - `hapax_http_requests_total` tracks request volume by endpoint and status
 41 | - `hapax_http_request_duration_seconds` measures request latency
 42 | - `hapax_http_active_requests` shows current load by endpoint
 43 | - `hapax_errors_total` monitors error rates by type
 44 | - `circuit_breaker_state` indicates provider health status
 45 | - `hapax_health_check_duration_seconds` validates provider responsiveness
 46 | - `hapax_deduplicated_requests_total` confirms request efficiency
 47 | - `hapax_rate_limit_hits_total` tracks rate limiting by client
 48 | 
 49 | ### Access Management
 50 | Security is enforced through API key-based authentication, with per-endpoint rate limiting and comprehensive request validation and sanitization.
 51 | 
 52 | ## Technical Implementation
 53 | 
 54 | ```json
 55 | // Example: Completion Request
 56 | {
 57 |     "messages": [
 58 |         {"role": "system", "content": "You are a customer service assistant."},
 59 |         {"role": "user", "content": "I need help with my order #12345"}
 60 |     ]
 61 | }
 62 | ```
 63 | 
 64 | When your primary provider experiences issues, Hapax:
 65 | 1. Detects the failure through continuous health checks (1-minute intervals)
 66 | 2. Activates the circuit breaker after 3 consecutive failures
 67 | 3. Routes traffic to healthy backup providers in preference order
 68 | 4. Maintains detailed metrics for operational visibility
 69 | 
 70 | ## Deployment Options
 71 | 
 72 | Deploy Hapax in minutes with our production-ready container:
 73 | 
 74 | ```bash
 75 | docker run -p 8080:8080 \
 76 |   -e OPENAI_API_KEY=your_key \
 77 |   -e ANTHROPIC_API_KEY=your_key \
 78 |   -e CONFIG_PATH=/app/config.yaml \
 79 |   teilomillet/hapax:latest
 80 | ```
 81 | 
 82 | Default configuration is provided but can be customized via `config.yaml`:
 83 | ```yaml
 84 | server:
 85 |   port: 8080
 86 |   read_timeout: 30s
 87 |   write_timeout: 45s
 88 |   max_header_bytes: 2097152  # 2MB
 89 |   shutdown_timeout: 30s
 90 |   http3:  # Optional HTTP/3 support
 91 |     enabled: true
 92 |     port: 443  # Default HTTPS/QUIC port
 93 |     tls_cert_file: "/path/to/cert.pem"
 94 |     tls_key_file: "/path/to/key.pem"
 95 |     idle_timeout: 30s
 96 |     max_bi_streams_concurrent: 100
 97 |     max_uni_streams_concurrent: 100
 98 |     max_stream_receive_window: 6291456      # 6MB
 99 |     max_connection_receive_window: 15728640  # 15MB
100 | 
101 | circuitBreaker:
102 |   maxRequests: 100
103 |   interval: 30s
104 |   timeout: 10s
105 |   failureThreshold: 5
106 | 
107 | providerPreference:
108 |   - ollama
109 |   - anthropic
110 |   - openai
111 | ```
112 | 
113 | ## Integration Architecture
114 | 
115 | Hapax provides comprehensive integration capabilities through multiple components:
116 | 
117 | ### REST API with Versioned Endpoints
118 | The API architecture provides dedicated endpoints for core functionalities: 
119 | - `/v1/completions` handles AI completions, 
120 | - `/v1/health` provides versioned API health monitoring, 
121 | - `/health` offers global system health status. 
122 | - `/metrics` exposes Prometheus metrics for comprehensive monitoring.
123 | 
124 | ### Comprehensive Monitoring
125 | The monitoring infrastructure integrates Prometheus metrics across all critical components, enabling detailed tracking of request latencies, circuit breaker states, provider health status, and request deduplication. This comprehensive approach ensures complete operational visibility.
126 | 
127 | ### Health Checks
128 | The health monitoring system operates with enterprise-grade configurability. Check intervals default to one minute with adjustable timeouts, while failure thresholds are tuned to prevent false positives. Health monitoring extends from individual providers to Docker container status, with granular per-provider health tracking.
129 | 
130 | ### Production Safeguards
131 | System integrity is maintained through multiple safeguards: request deduplication prevents redundant processing, automatic failover ensures continuous operation, circuit breaker patterns protect against cascade failures, and structured JSON logging with correlation IDs enables thorough debugging.
132 | 
133 | ### Protocol Support
134 | The server supports both HTTP/1.1 and HTTP/3 (QUIC) protocols:
135 | - HTTP/1.1 for universal compatibility
136 | - HTTP/3 for improved performance:
137 |   - Reduced latency through 0-RTT connections
138 |   - Better multiplexing with independent streams
139 |   - Improved congestion control
140 |   - Automatic connection migration
141 |   - Built-in TLS 1.3 encryption
142 | 
143 | ## Technical Requirements
144 | 
145 | Running Hapax requires:
146 | - Docker-compatible environment with network access to AI providers
147 | - 1GB RAM minimum (4GB recommended for production)
148 | - TLS certificates for HTTP/3 support (if enabled)
149 | - Access credentials (API keys) for supported providers: OpenAI, Anthropic, etc.
150 | 
151 | ## Documentation
152 | 
153 | Comprehensive documentation is available through multiple resources. The [Quick Start Guide](https://github.com/teilomillet/hapax/wiki) provides initial setup instructions, while detailed information about the API and security measures can be found in the [API Documentation](docs/api.md) and [Security Overview](docs/security.md). For operational insights, consult the [Monitoring Guide](docs/monitoring.md).
154 | 
155 | ## License
156 | 
157 | Licensed under Apache 2.0. See [LICENSE](LICENSE) for details.
158 | 
159 | ---
160 | 
161 | For detailed technical specifications, visit our [Technical Documentation](docs/technical.md).


--------------------------------------------------------------------------------
/ROADMAP.md:
--------------------------------------------------------------------------------
  1 | # Hapax Development Roadmap
  2 | 
  3 | ## Vision
  4 | Hapax is the reliability layer between your code and LLM providers. We're building an open-source infrastructure layer that makes LLM operations robust and predictable. Our goal is to provide the tools and visibility you need to run AI applications with confidence, whether you're a solo developer or running large-scale deployments.
  5 | 
  6 | ### Core Principles
  7 | - **Reliability**: Smart provider management for uninterrupted operations
  8 | - **Visibility**: Clear insights into your LLM infrastructure
  9 | - **Flexibility**: Adaptable to your security and scaling needs
 10 | - **Simplicity**: Complex infrastructure made approachable
 11 | 
 12 | ## v0.1.0: Foundation (Current)
 13 | Focus: Core functionality and initial production readiness.
 14 | 
 15 | ### Core Features
 16 | - [x] Request queueing and deduplication
 17 | - [x] HTTP/3 (QUIC) implementation
 18 |   - High-performance transport layer
 19 |   - 0-RTT connection establishment
 20 |   - Connection migration
 21 |   - Multiplexing optimization
 22 |   - TLS 1.3 integration
 23 | 
 24 | ### Documentation
 25 | - [x] Installation and Configuration
 26 |   - Deployment guide
 27 |   - Configuration reference
 28 |   - Security setup
 29 |   - Performance tuning
 30 | - [x] API Documentation
 31 |   - Endpoint specifications
 32 |   - Request/response formats
 33 |   - Error handling
 34 |   - Authentication
 35 | - [x] Operations Guide
 36 |   - Monitoring setup
 37 |   - Metrics reference
 38 |   - Logging guide
 39 |   - Troubleshooting
 40 | 
 41 | ## v0.2.0: Enterprise Observability
 42 | Focus: Deep visibility and operational intelligence.
 43 | 
 44 | ### Advanced Monitoring
 45 | - [ ] Enhanced metrics collection
 46 |   - Detailed latency tracking
 47 |   - Resource utilization metrics
 48 |   - Provider-specific metrics
 49 |   - Custom metric pipelines
 50 | - [ ] Advanced audit logging
 51 |   - Structured audit events
 52 |   - Compliance-ready logging
 53 |   - Log aggregation support
 54 |   - Log retention policies
 55 | - [ ] Operational dashboards
 56 |   - Real-time system visibility
 57 |   - Performance analytics
 58 |   - Health monitoring
 59 |   - Alert management
 60 | 
 61 | ### Security Enhancements
 62 | - [ ] Role-based access control
 63 |   - Fine-grained permissions
 64 |   - Resource-level access
 65 |   - Audit trails
 66 |   - Identity provider integration
 67 | - [ ] Enhanced security features
 68 |   - Request validation
 69 |   - Rate limiting
 70 |   - Token management
 71 |   - Security event monitoring
 72 | 
 73 | ## v0.3.0: Enterprise Scale
 74 | Focus: Horizontal scaling and high availability.
 75 | 
 76 | ### Distributed Architecture
 77 | - [ ] Cluster mode
 78 |   - Leader election
 79 |   - State synchronization
 80 |   - Node auto-discovery
 81 |   - Cross-node routing
 82 | - [ ] Advanced request management
 83 |   - Dynamic rate limiting
 84 |   - Request quotas
 85 |   - Load balancing
 86 |   - Circuit breaking
 87 | - [ ] Distributed caching
 88 |   - Cache strategies
 89 |   - Invalidation rules
 90 |   - Memory management
 91 |   - Cache analytics
 92 | 
 93 | ### Enterprise Integration
 94 | - [ ] Advanced routing
 95 |   - Content-based routing
 96 |   - Traffic splitting
 97 |   - Request transformation
 98 |   - Custom middleware
 99 | - [ ] Provider management
100 |   - Multi-provider failover
101 |   - Provider health tracking
102 |   - Cost optimization
103 |   - Usage analytics
104 | 
105 | ## v1.0.0: Production Scale
106 | Focus: Mission-critical deployment capabilities.
107 | 
108 | ### Performance & Reliability
109 | - [ ] Advanced performance features
110 |   - Connection pooling
111 |   - Request batching
112 |   - Memory optimization
113 |   - CPU optimization
114 | - [ ] Reliability enhancements
115 |   - Automated failover
116 |   - Self-healing
117 |   - Predictive scaling
118 |   - Performance prediction
119 | 
120 | ### Enterprise Operations
121 | - [ ] Cost management
122 |   - Usage tracking
123 |   - Budget controls
124 |   - Cost allocation
125 |   - Usage forecasting
126 | - [ ] SLA management
127 |   - SLA definition
128 |   - Performance tracking
129 |   - Availability monitoring
130 |   - Compliance reporting
131 | 
132 | ### Advanced Features
133 | - [ ] Multi-region support
134 |   - Geographic routing
135 |   - Regional failover
136 |   - Data sovereignty
137 |   - Cross-region analytics
138 | - [ ] Advanced security
139 |   - Zero-trust architecture
140 |   - Advanced threat detection
141 |   - Security analytics
142 |   - Compliance automation
143 | 
144 | ## Success Metrics
145 | - Sub-minute deployment time
146 | - Zero-touch configuration
147 | - 99.99% availability
148 | - < 50ms added latency
149 | - Zero security vulnerabilities
150 | - Automatic failure recovery
151 | - Complete operational visibility
152 | 
153 | ## Future Considerations
154 | - Edge computing integration
155 | - Custom model hosting
156 | - Model performance analytics
157 | - Fine-tuning infrastructure
158 | - Hybrid deployment models
159 | - Advanced protocol support
160 | 
161 | ## Notes
162 | - Security and reliability are continuous priorities
163 | - Each feature includes comprehensive testing and documentation
164 | - Regular security audits are mandatory
165 | - Features may be reprioritized based on enterprise requirements
166 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | Place where I write notes about what need to be done
2 | 


--------------------------------------------------------------------------------
/bin/golangci-lint:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teilomillet/hapax/7c08e64129a1c0adfea8a65dd7a608fd2b345a37/bin/golangci-lint


--------------------------------------------------------------------------------
/cmd/hapax/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"log"
 8 | 	"os"
 9 | 	"os/signal"
10 | 	"syscall"
11 | 
12 | 	"github.com/teilomillet/hapax/config"
13 | 	"github.com/teilomillet/hapax/server"
14 | 	"go.uber.org/zap"
15 | )
16 | 
17 | var (
18 | 	configFile = flag.String("config", "hapax.yaml", "Path to configuration file")
19 | 	validate   = flag.Bool("validate", false, "Validate configuration and exit")
20 | 	version    = flag.Bool("version", false, "Print version and exit")
21 | )
22 | 
23 | // Version represents the current version of Hapax
24 | const Version = "v0.1.0"
25 | 
26 | func main() {
27 | 	flag.Parse()
28 | 
29 | 	if *version {
30 | 		fmt.Printf("hapax %s\n", Version)
31 | 		os.Exit(0)
32 | 	}
33 | 
34 | 	// Create logger
35 | 	logger, err := zap.NewProduction()
36 | 	if err != nil {
37 | 		log.Fatalf("Failed to create logger: %v", err)
38 | 	}
39 | 	defer func() {
40 | 		if err := logger.Sync(); err != nil {
41 | 			// Log sync failure, but use fmt.Fprintf to stderr since the zap logger might be unavailable
42 | 			fmt.Fprintf(os.Stderr, "Failed to sync logger: %v\n", err)
43 | 		}
44 | 	}()
45 | 
46 | 	// Load configuration
47 | 	cfg, err := config.LoadFile(*configFile)
48 | 	if err != nil {
49 | 		logger.Fatal("Failed to load config",
50 | 			zap.Error(err),
51 | 			zap.String("config_file", *configFile),
52 | 		)
53 | 	}
54 | 
55 | 	// Just validate and exit if requested
56 | 	if *validate {
57 | 		fmt.Println("Configuration is valid")
58 | 		os.Exit(0)
59 | 	}
60 | 
61 | 	// Create server with config path and logger
62 | 	srv, err := server.NewServer(*configFile, logger)
63 | 	if err != nil {
64 | 		logger.Fatal("Failed to create server",
65 | 			zap.Error(err),
66 | 		)
67 | 	}
68 | 
69 | 	// Setup signal handling for graceful shutdown
70 | 	ctx, cancel := context.WithCancel(context.Background())
71 | 	defer cancel()
72 | 
73 | 	go func() {
74 | 		sigCh := make(chan os.Signal, 1)
75 | 		signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
76 | 		<-sigCh
77 | 		logger.Info("Received shutdown signal")
78 | 		cancel()
79 | 	}()
80 | 
81 | 	// Start server
82 | 	logger.Info("Starting hapax",
83 | 		zap.String("version", Version),
84 | 		zap.Int("port", cfg.Server.Port),
85 | 	)
86 | 
87 | 	if err := srv.Start(ctx); err != nil {
88 | 		logger.Fatal("Server error",
89 | 			zap.Error(err),
90 | 		)
91 | 	}
92 | }
93 | 


--------------------------------------------------------------------------------
/config.example.yaml:
--------------------------------------------------------------------------------
 1 | server:
 2 |   port: 8080
 3 |   read_timeout: 30s
 4 |   write_timeout: 30s
 5 |   max_header_bytes: 1048576
 6 |   shutdown_timeout: 5s
 7 | 
 8 | llm:
 9 |   provider: anthropic
10 |   model: claude-3.5-haiku-latest
11 |   api_key: ${ANTHROPIC_API_KEY}
12 |   max_context_tokens: 100000
13 |   retry:
14 |     max_retries: 3
15 |     initial_delay: 100ms
16 |     max_delay: 2s
17 |     multiplier: 2.0
18 |     retryable_errors: ["rate_limit", "timeout", "server_error"]
19 | 
20 | providers:
21 |   anthropic:
22 |     type: anthropic
23 |     model: claude-3.5-haiku-latest
24 |     api_key: ${ANTHROPIC_API_KEY}
25 |   ollama:
26 |     type: ollama
27 |     model: llama3
28 |     api_key: ""
29 | 
30 | provider_preference:
31 |   - anthropic
32 |   - ollama
33 | 
34 | logging:
35 |   level: info
36 |   format: json
37 | 
38 | metrics:
39 |   enabled: true
40 |   prometheus:
41 |     enabled: true
42 | 
43 | routes:
44 |   - path: /v1/completions
45 |     handler: completion
46 |     version: v1
47 |     methods: [POST]
48 |   - path: /health
49 |     handler: health
50 |     version: v1
51 |     methods: [GET]
52 | 
53 | processing:
54 |   request_templates:
55 |     default: "{{.Input}}"
56 |     chat: "{{range .Messages}}{{.Role}}: {{.Content}}\n{{end}}"
57 |   response_formatting:
58 |     clean_json: true
59 |     trim_whitespace: true
60 |     max_length: 1048576


--------------------------------------------------------------------------------
/config/config_test.go:
--------------------------------------------------------------------------------
  1 | package config
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"testing"
  6 | 	"time"
  7 | )
  8 | 
  9 | func TestLoadValidConfig(t *testing.T) {
 10 | 	yamlConfig := `
 11 | server:
 12 |   port: 9090
 13 |   read_timeout: 45s
 14 |   write_timeout: 45s
 15 |   max_header_bytes: 2097152
 16 |   shutdown_timeout: 45s
 17 | 
 18 | llm:
 19 |   provider: openai
 20 |   model: gpt-4
 21 |   endpoint: https://api.openai.com/v1
 22 |   system_prompt: "You are a helpful assistant."
 23 |   options:
 24 |     temperature: 0.8
 25 |     max_tokens: 4000
 26 | 
 27 | logging:
 28 |   level: debug
 29 |   format: json
 30 | 
 31 | routes:
 32 |   - path: /v1/completions
 33 |     handler: completion
 34 |     version: v1
 35 |   - path: /health
 36 |     handler: health
 37 |     version: v1
 38 | `
 39 | 
 40 | 	config, err := Load(strings.NewReader(yamlConfig))
 41 | 	if err != nil {
 42 | 		t.Fatalf("Failed to load valid config: %v", err)
 43 | 	}
 44 | 
 45 | 	// Check server config
 46 | 	if config.Server.Port != 9090 {
 47 | 		t.Errorf("unexpected port: got %d, want %d", config.Server.Port, 9090)
 48 | 	}
 49 | 	if config.Server.ReadTimeout != 45*time.Second {
 50 | 		t.Errorf("unexpected read timeout: got %v, want %v", config.Server.ReadTimeout, 45*time.Second)
 51 | 	}
 52 | 
 53 | 	// Check LLM config
 54 | 	if config.LLM.Provider != "openai" {
 55 | 		t.Errorf("unexpected provider: got %s, want %s", config.LLM.Provider, "openai")
 56 | 	}
 57 | 	if config.LLM.Model != "gpt-4" {
 58 | 		t.Errorf("unexpected model: got %s, want %s", config.LLM.Model, "gpt-4")
 59 | 	}
 60 | 
 61 | 	// Check logging config
 62 | 	if config.Logging.Level != "debug" {
 63 | 		t.Errorf("unexpected log level: got %s, want %s", config.Logging.Level, "debug")
 64 | 	}
 65 | 	if config.Logging.Format != "json" {
 66 | 		t.Errorf("unexpected log format: got %s, want %s", config.Logging.Format, "json")
 67 | 	}
 68 | 
 69 | 	// Check routes
 70 | 	if len(config.Routes) != 2 {
 71 | 		t.Errorf("unexpected number of routes: got %d, want %d", len(config.Routes), 2)
 72 | 	}
 73 | }
 74 | 
 75 | func TestLoadInvalidConfig(t *testing.T) {
 76 | 	tests := []struct {
 77 | 		name   string
 78 | 		config string
 79 | 		want   string
 80 | 	}{
 81 | 		{
 82 | 			name: "invalid port",
 83 | 			config: `
 84 | server:
 85 |   port: -1
 86 | `,
 87 | 			want: "invalid port",
 88 | 		},
 89 | 		{
 90 | 			name: "invalid log level",
 91 | 			config: `
 92 | logging:
 93 |   level: invalid
 94 | `,
 95 | 			want: "invalid log level",
 96 | 		},
 97 | 		{
 98 | 			name: "empty provider",
 99 | 			config: `
100 | llm:
101 |   provider: ""
102 | `,
103 | 			want: "empty LLM provider",
104 | 		},
105 | 		{
106 | 			name: "empty route path",
107 | 			config: `
108 | routes:
109 |   - path: ""
110 |     handler: test
111 | `,
112 | 			want: "empty path",
113 | 		},
114 | 	}
115 | 
116 | 	for _, tt := range tests {
117 | 		t.Run(tt.name, func(t *testing.T) {
118 | 			_, err := Load(strings.NewReader(tt.config))
119 | 			if err == nil {
120 | 				t.Error("expected error, got nil")
121 | 			} else if !strings.Contains(err.Error(), tt.want) {
122 | 				t.Errorf("unexpected error: got %v, want %v", err, tt.want)
123 | 			}
124 | 		})
125 | 	}
126 | }
127 | 
128 | func TestDefaultConfig(t *testing.T) {
129 | 	config := DefaultConfig()
130 | 
131 | 	// Check server defaults
132 | 	if config.Server.Port != 8080 {
133 | 		t.Errorf("unexpected default port: got %d, want %d", config.Server.Port, 8080)
134 | 	}
135 | 	if config.Server.ReadTimeout != 30*time.Second {
136 | 		t.Errorf("unexpected default read timeout: got %v, want %v", config.Server.ReadTimeout, 30*time.Second)
137 | 	}
138 | 
139 | 	// Check LLM defaults
140 | 	if config.LLM.Provider != "ollama" {
141 | 		t.Errorf("unexpected default provider: got %s, want %s", config.LLM.Provider, "ollama")
142 | 	}
143 | 	if config.LLM.Model != "llama2" {
144 | 		t.Errorf("unexpected default model: got %s, want %s", config.LLM.Model, "llama2")
145 | 	}
146 | 
147 | 	// Check logging defaults
148 | 	if config.Logging.Level != "info" {
149 | 		t.Errorf("unexpected default log level: got %s, want %s", config.Logging.Level, "info")
150 | 	}
151 | 	if config.Logging.Format != "json" {
152 | 		t.Errorf("unexpected default log format: got %s, want %s", config.Logging.Format, "json")
153 | 	}
154 | 
155 | 	// Check default routes
156 | 	if len(config.Routes) != 3 {
157 | 		t.Errorf("unexpected number of default routes: got %d, want %d",
158 | 			len(config.Routes), 3)
159 | 	}
160 | }
161 | 


--------------------------------------------------------------------------------
/config/config_watcher.go:
--------------------------------------------------------------------------------
  1 | // config_watcher.go
  2 | package config
  3 | 
  4 | import (
  5 | 	"fmt"
  6 | 	"sync/atomic"
  7 | 
  8 | 	"github.com/fsnotify/fsnotify"
  9 | 	"go.uber.org/zap"
 10 | )
 11 | 
 12 | // Verify at compile time that ConfigWatcher implements Watcher
 13 | var _ Watcher = (*ConfigWatcher)(nil)
 14 | 
 15 | // ConfigWatcher manages configuration hot reloading
 16 | type ConfigWatcher struct {
 17 | 	// Using atomic.Value for thread-safe config access
 18 | 	currentConfig atomic.Value
 19 | 	configPath    string
 20 | 	watcher       *fsnotify.Watcher
 21 | 	logger        *zap.Logger
 22 | 	// Channel to notify subscribers of config changes
 23 | 	subscribers []chan<- *Config
 24 | }
 25 | 
 26 | // NewConfigWatcher creates a new configuration watcher
 27 | func NewConfigWatcher(configPath string, logger *zap.Logger) (*ConfigWatcher, error) {
 28 | 	watcher, err := fsnotify.NewWatcher()
 29 | 	if err != nil {
 30 | 		return nil, fmt.Errorf("failed to create watcher: %w", err)
 31 | 	}
 32 | 
 33 | 	cw := &ConfigWatcher{
 34 | 		configPath: configPath,
 35 | 		watcher:    watcher,
 36 | 		logger:     logger,
 37 | 	}
 38 | 
 39 | 	// Load initial configuration
 40 | 	initialConfig, err := LoadFile(configPath)
 41 | 	if err != nil {
 42 | 		return nil, fmt.Errorf("failed to load initial config: %w", err)
 43 | 	}
 44 | 	cw.currentConfig.Store(initialConfig)
 45 | 
 46 | 	// Start watching the config file
 47 | 	if err := watcher.Add(configPath); err != nil {
 48 | 		return nil, fmt.Errorf("failed to watch config file: %w", err)
 49 | 	}
 50 | 
 51 | 	go cw.watchConfig()
 52 | 	return cw, nil
 53 | }
 54 | 
 55 | // Subscribe allows components to receive config updates
 56 | func (cw *ConfigWatcher) Subscribe() <-chan *Config {
 57 | 	ch := make(chan *Config, 1)
 58 | 	cw.subscribers = append(cw.subscribers, ch)
 59 | 	return ch
 60 | }
 61 | 
 62 | // GetCurrentConfig returns the current configuration thread-safely
 63 | func (cw *ConfigWatcher) GetCurrentConfig() *Config {
 64 | 	return cw.currentConfig.Load().(*Config)
 65 | }
 66 | 
 67 | func (cw *ConfigWatcher) watchConfig() {
 68 | 	for {
 69 | 		select {
 70 | 		case event, ok := <-cw.watcher.Events:
 71 | 			if !ok {
 72 | 				return
 73 | 			}
 74 | 			if event.Op&fsnotify.Write == fsnotify.Write {
 75 | 				cw.handleConfigChange()
 76 | 			}
 77 | 		case err, ok := <-cw.watcher.Errors:
 78 | 			if !ok {
 79 | 				return
 80 | 			}
 81 | 			cw.logger.Error("Config watcher error", zap.Error(err))
 82 | 		}
 83 | 	}
 84 | }
 85 | 
 86 | func (cw *ConfigWatcher) handleConfigChange() {
 87 | 	cw.logger.Info("Detected config file change, reloading...")
 88 | 
 89 | 	newConfig, err := LoadFile(cw.configPath)
 90 | 	if err != nil {
 91 | 		cw.logger.Error("Failed to load new config", zap.Error(err))
 92 | 		return
 93 | 	}
 94 | 
 95 | 	// Validate the new configuration
 96 | 	if err := newConfig.Validate(); err != nil {
 97 | 		cw.logger.Error("Invalid new configuration", zap.Error(err))
 98 | 		return
 99 | 	}
100 | 
101 | 	// Store the new configuration
102 | 	cw.currentConfig.Store(newConfig)
103 | 
104 | 	// Notify all subscribers
105 | 	for _, sub := range cw.subscribers {
106 | 		select {
107 | 		case sub <- newConfig:
108 | 		default:
109 | 			// Skip if subscriber is not ready
110 | 		}
111 | 	}
112 | 
113 | 	cw.logger.Info("Configuration reloaded successfully")
114 | }
115 | 
116 | func (cw *ConfigWatcher) Close() error {
117 | 	return cw.watcher.Close()
118 | }
119 | 


--------------------------------------------------------------------------------
/config/processing.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | // ProcessingConfig defines the configuration for request/response processing
 4 | type ProcessingConfig struct {
 5 | 	// RequestTemplates maps template names to their content
 6 | 	RequestTemplates map[string]string `yaml:"request_templates"`
 7 | 
 8 | 	// ResponseFormatting configures how responses should be formatted
 9 | 	ResponseFormatting ResponseFormattingConfig `yaml:"response_formatting"`
10 | }
11 | 
12 | // ResponseFormattingConfig defines response formatting options
13 | type ResponseFormattingConfig struct {
14 | 	// CleanJSON enables JSON response cleaning using gollm
15 | 	CleanJSON bool `yaml:"clean_json"`
16 | 
17 | 	// TrimWhitespace removes extra whitespace from responses
18 | 	TrimWhitespace bool `yaml:"trim_whitespace"`
19 | 
20 | 	// MaxLength limits the response length
21 | 	MaxLength int `yaml:"max_length"`
22 | }
23 | 


--------------------------------------------------------------------------------
/config/watcher_interface.go:
--------------------------------------------------------------------------------
1 | package config
2 | 
3 | // Watcher defines the behavior we expect from any configuration watcher
4 | type Watcher interface {
5 | 	GetCurrentConfig() *Config
6 | 	Subscribe() <-chan *Config
7 | 	Close() error
8 | }
9 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   hapax:
 3 |     build: .
 4 |     container_name: hapax
 5 |     ports:
 6 |       - "8081:8081"
 7 |     volumes:
 8 |       - ./config.yaml:/app/config.yaml
 9 |     environment:
10 |       - TZ=UTC
11 |     restart: unless-stopped
12 |     healthcheck:
13 |       test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8081/health"]
14 |       interval: 30s
15 |       timeout: 3s
16 |       retries: 3
17 |     networks:
18 |       - hapax-net
19 | 
20 |   prometheus:
21 |     image: prom/prometheus:latest
22 |     ports:
23 |       - "9090:9090"
24 |     volumes:
25 |       - ./prometheus.yml:/etc/prometheus/prometheus.yml
26 |     depends_on:
27 |       - hapax
28 |     networks:
29 |       - hapax-net
30 | 
31 | networks:
32 |   hapax-net:
33 |     driver: bridge
34 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
 1 | title: "Hapax"
 2 | description: "The reliability layer between your code and LLM providers"
 3 | remote_theme: "just-the-docs/just-the-docs"
 4 | 
 5 | # Theme and Navigation
 6 | color_scheme: "dark"
 7 | nav_spacing: "normal"
 8 | heading_anchors: true
 9 | back_to_top: true
10 | back_to_top_text: "Back to top"
11 | 
12 | # Theme Overrides
13 | color_scheme: "dark"
14 | custom_css: true
15 | custom_css_path: "/assets/css/custom.css"
16 | 
17 | # Typography and Spacing
18 | spacing_unit: 2
19 | font_size: "16px"
20 | content_width: "900px"
21 | typographic_spacing: true
22 | 
23 | # Heading Styles
24 | heading_spacing: 2.5
25 | h1_size: "3rem"
26 | h2_size: "2.5rem"
27 | h3_size: "1.75rem"
28 | h4_size: "1.25rem"
29 | 
30 | # Text Styles
31 | body_line_height: 1.8
32 | body_font_family: "system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif"
33 | mono_font_family: "'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace"
34 | 
35 | # Search Configuration
36 | search_enabled: true
37 | search:
38 |   heading_level: 2
39 |   previews: 3
40 |   preview_words_before: 5
41 |   preview_words_after: 5
42 |   tokenizer_separator: "/[\\s/]+/"
43 |   rel_url: true
44 |   button: false
45 | 
46 | # Code and Content
47 | enable_copy_code_button: true
48 | code_block_padding: "1.5rem"
49 | code_font_size: "0.95em"
50 | 
51 | # Callouts for Important Information
52 | callouts:
53 |   note:
54 |     title: "Note"
55 |     color: "blue"
56 |   important:
57 |     title: "Important"
58 |     color: "purple"
59 |   warning:
60 |     title: "Warning"
61 |     color: "red"
62 |   tip:
63 |     title: "Tip"
64 |     color: "green"
65 |   security:
66 |     title: "Security"
67 |     color: "yellow"
68 | 
69 | # External Links
70 | aux_links:
71 |   "GitHub": ["//github.com/teilomillet/hapax"]
72 | aux_links_new_tab: true
73 | 
74 | # License
75 | footer_content: "Licensed under Apache License, Version 2.0."
76 | 
77 | # Additional Styles
78 | callouts_opacity: 1
79 | callouts_padding: "1.5rem"
80 | table_font_size: "0.95em"
81 | nav_font_size: "1rem"
82 | sidebar_width: "280px"


--------------------------------------------------------------------------------
/docs/assets/css/custom.css:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/docs/blog/why-we-built-hapax.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Hapax - The Missing Layer in Enterprise AI Infrastructure
 4 | nav_order: 1
 5 | ---
 6 | 
 7 | # Hapax: The Missing Layer in Enterprise AI Infrastructure
 8 | 
 9 | Every conversation with companies implementing AI follows a strikingly similar pattern. As a consultant, I'd walk into their offices and find teams of engineers wrestling with the same fundamental challenges. They weren't struggling with the exciting parts of AI - the innovative features or creative applications. Instead, they were bogged down by infrastructure concerns that seemed to repeat across every organization.
10 | 
11 | The story usually begins with experimentation. A company starts playing with different AI models, testing capabilities across providers like OpenAI, Anthropic, and Ollama. They're model hoppers, constantly switching between providers as they discover new capabilities or run into limitations. This experimentation is valuable, but it creates a hidden cost: each switch requires engineering time to adapt their infrastructure.
12 | 
13 | What struck me most was watching companies build the same solutions over and over. One week, I'd watch a startup implement retry logic for their AI calls. The next week, I'd find an enterprise team building nearly identical failover systems. These weren't small companies making rookie mistakes - these were sophisticated teams spending valuable time solving infrastructure problems instead of building their core products.
14 | 
15 | The real wake-up call came when discussing monitoring and usage tracking. Companies could tell me their total API costs, but they struggled to answer basic questions about their AI operations. Which endpoints were most active? What was their actual uptime? How were different teams using these services? The data existed, but the infrastructure to make sense of it didn't.
16 | 
17 | The pattern became clear: the missing piece wasn't AI capability - it was the infrastructure layer that makes AI reliable, observable, and manageable in production. Small companies were hitting a wall, forced to choose between hiring specialized talent or limiting their AI ambitions. Large corporations were forming entire teams just to manage these basic infrastructure needs.
18 | 
19 | When I looked at how companies were handling these challenges, I saw a concerning pattern. Each organization was building their own infrastructure from scratch, writing thousands of lines of code to handle basic needs like retries and failover. A typical homegrown solution might look something like this:
20 | 
21 | ```python
22 | async def make_ai_request(prompt, retries=3):
23 |     for attempt in range(retries):
24 |         try:
25 |             response = await primary_provider.create_completion(prompt)
26 |             return response
27 |         except ProviderError:
28 |             if attempt == retries - 1:
29 |                 try:
30 |                     # Attempt with backup provider
31 |                     return await backup_provider.create_completion(prompt)
32 |                 except:
33 |                     raise
34 |             time.sleep(2 ** attempt)  # Basic exponential backoff
35 | ```
36 | 
37 | This code might work for simple cases, but it lacks proper error handling, doesn't consider provider health, offers no visibility into performance, and requires significant maintenance as providers evolve. Now multiply this across different teams and departments, each building their own version, each maintaining their own infrastructure.
38 | 
39 | Hapax transforms this complexity into a simple configuration:
40 | 
41 | ```yaml
42 | providers:
43 |   anthropic:
44 |     type: anthropic
45 |     model: claude-3-haiku
46 |     api_key: ${ANTHROPIC_API_KEY}
47 |   openai:
48 |     type: openai
49 |     model: gpt-4
50 |     api_key: ${OPENAI_API_KEY}
51 | 
52 | provider_preference:
53 |   - anthropic
54 |   - openai
55 | ```
56 | 
57 | With this configuration, you get enterprise-grade infrastructure that includes intelligent failover between providers with health monitoring, comprehensive metrics through Prometheus integration, sophisticated request queuing and deduplication, real-time performance monitoring, structured logging for debugging, and HTTP/3 support for mobile users.
58 | 
59 | Consider how this changes your operations. Instead of each team implementing their own retry logic and monitoring, they can focus on building features. When a provider has issues, Hapax automatically detects the problem through its health checks and routes traffic to healthy providers. Your applications continue running without interruption.
60 | 
61 | The monitoring system gives you immediate visibility into your AI operations. Want to understand how different departments use AI? Create department-specific endpoints:
62 | 
63 | ```yaml
64 | routes:
65 |   - path: "/v1/marketing/completions"
66 |     handler: completion
67 |     version: v1
68 |     metrics_labels:
69 |       department: marketing
70 | ```
71 | 
72 | Now you can track usage, performance, and costs per department through your existing monitoring tools like Grafana, Power BI or Tableau. No custom integration required - Hapax provides these metrics through standard Prometheus endpoints.
73 | 
74 | For mobile applications, Hapax's HTTP/3 support ensures reliable service even as users move between networks. The connection migration capabilities mean that if a user switches from WiFi to cellular, their AI interactions continue seamlessly. This isn't just a technical feature - it's about providing consistent service to your users regardless of their connection.
75 | 
76 | Think about what this means for your organization. Rather than every team reinventing infrastructure, you have a standardized, production-ready solution that deploys in minutes with a single Docker command, integrates with your existing monitoring stack, handles provider failures automatically, gives you complete visibility into AI operations, and scales with your needs.
77 | 
78 | The real power of Hapax becomes clear when you look toward the future. As AI continues to transform how we build software, the need for reliable, observable infrastructure only grows. Consider how your organization's AI journey might unfold:
79 | 
80 | Today, you might start with a simple customer service enhancement using LLMs. With Hapax, this means adding a few lines to your configuration file, and suddenly you have production-ready infrastructure that rivals what large tech companies have built internally. Your engineers don't need to worry about provider outages or performance monitoring - they can focus entirely on crafting the perfect customer experience.
81 | 
82 | As your AI usage grows, Hapax grows with you. When your marketing team wants to experiment with different AI providers for content generation, you won't need to build new infrastructure or hire specialists. They can simply use their dedicated endpoint, while Hapax handles the complexity of provider management and gives them real-time visibility into their usage and costs.
83 | 
84 | The transformation continues as AI becomes central to your operations. Your data science team might want to A/B test different models, your product team might need geographic routing for global customers, and your finance team might require detailed cost allocation. With Hapax, these aren't infrastructure challenges - they're just configuration changes.
85 | 
86 | This standardization brings another powerful benefit: knowledge sharing across your organization. Instead of each team developing their own best practices for AI deployment, they build on a common foundation. A solution discovered by your customer service team can be immediately applied to your sales team's AI implementations. Your organization learns and improves as a unified whole.
87 | 
88 | We're building Hapax in the open because we believe reliable AI infrastructure shouldn't be limited to companies with massive engineering resources. Whether you're a startup launching your first AI feature or an enterprise scaling to millions of requests, you deserve infrastructure that just works.
89 | 
90 | Ready to transform how your organization builds with AI? Deploy Hapax in minutes with our Docker container, or dive into our documentation to learn more. Join us in building the foundation for the next generation of AI applications - where infrastructure is an enabler, not a barrier, to innovation.
91 | 
92 | [Get Started with Hapax](/docs/getting-started)
93 | [Join Our Community](https://github.com/teilomillet/hapax)
94 | [Read the Documentation](/docs/) 


--------------------------------------------------------------------------------
/docs/getting-started.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teilomillet/hapax/7c08e64129a1c0adfea8a65dd7a608fd2b345a37/docs/getting-started.md


--------------------------------------------------------------------------------
/docs/getting-started/5-minute-setup.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: 5-Minute Setup
 4 | parent: Getting Started
 5 | nav_order: 1
 6 | ---
 7 | 
 8 | # 5-Minute Setup
 9 | 
10 | This guide will get you running with Hapax in under 5 minutes using Docker.
11 | 
12 | {: .note }
13 | > **Prerequisites**
14 | > - Docker installed
15 | > - API key from any supported provider (OpenAI, Anthropic, etc.)
16 | 
17 | ## 1. Run Hapax
18 | 
19 | Copy and run this command, replacing `your_key` with your API key:
20 | 
21 | ```bash
22 | docker run -p 8080:8080 \
23 |   -e OPENAI_API_KEY=your_key \
24 |   teilomillet/hapax:latest
25 | ```
26 | 
27 | ## 2. Verify Installation
28 | 
29 | Test that Hapax is running:
30 | 
31 | ```bash
32 | curl http://localhost:8080/health
33 | # Expected: {"status":"ok"}
34 | ```
35 | 
36 | ## 3. Make Your First Request
37 | 
38 | Send a test completion request:
39 | 
40 | ```bash
41 | curl -X POST http://localhost:8080/v1/completions \
42 |   -H "Content-Type: application/json" \
43 |   -d '{"messages":[{"role":"user","content":"Hello"}]}'
44 | ```


--------------------------------------------------------------------------------
/docs/getting-started/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: page
 3 | title: Getting Started
 4 | nav_order: 2
 5 | has_children: true
 6 | ---
 7 | 
 8 | # Getting Started with Hapax
 9 | 
10 | Choose your path to get started with Hapax:
11 | 
12 | {: .note }
13 | > **New to Hapax?**  
14 | > Start with our [5-Minute Setup](5-minute-setup) guide to get running quickly.
15 | 
16 | ## Installation Options
17 | 
18 | 1. **[5-Minute Setup](5-minute-setup)**
19 |    - Fastest path to running Hapax
20 |    - Uses Docker
21 |    - Minimal configuration needed
22 | 
23 | 2. **[Manual Installation](manual-installation)**
24 |    - Full control over installation
25 |    - Suitable for development
26 |    - Access to all features
27 | 
28 | 3. **[Production Setup](../production)**
29 |    - Secure deployment configuration
30 |    - Monitoring setup
31 |    - Performance optimization 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: home
 3 | title: Home
 4 | nav_order: 1
 5 | ---
 6 | 
 7 | # Hapax Documentation
 8 | 
 9 | {: .fs-9 }
10 | The reliability layer between your code and LLM providers
11 | 
12 | {: .fs-6 .fw-300 }
13 | A lightweight, production-ready infrastructure layer that ensures continuous operation through intelligent provider management and automatic failover.
14 | 
15 | [Quick Start](getting-started/5-minute-setup){: .btn .btn-primary .fs-5 .mb-4 .mb-md-0 .mr-2 }
16 | [View Source](https://github.com/teilomillet/hapax){: .btn .fs-5 .mb-4 .mb-md-0 }
17 | 
18 | ---
19 | 
20 | ## Why Hapax?
21 | 
22 | {: .important }
23 | > Hapax addresses the fundamental challenges of working with LLM providers: service reliability, provider management, and operational visibility.
24 | 
25 | ### Key Benefits
26 | 
27 | {: .note }
28 | > **Continuous Operation**  
29 | > Automatic failover between providers maintains service availability during outages or degraded performance.
30 | 
31 | {: .note }
32 | > **Minimal Configuration**  
33 | > Single configuration file handles all provider settings, health checks, and failover logic.
34 | 
35 | {: .note }
36 | > **Operational Insight**  
37 | > Built-in metrics expose detailed provider performance, request patterns, and system health.
38 | 
39 | ## Core Features
40 | 
41 | ### Intelligent Provider Management
42 | - Health monitoring with configurable thresholds
43 | - Automatic provider failover
44 | - Circuit breaker implementation
45 | - Request deduplication
46 | 
47 | ### System Architecture
48 | - Request routing and load distribution
49 | - Comprehensive error handling
50 | - Structured logging with request tracing
51 | - HTTP/3 support
52 | 
53 | ### Security and Monitoring
54 | - API key-based authentication
55 | - Request validation
56 | - Usage metrics per endpoint
57 | - Prometheus integration
58 | 
59 | ## Documentation
60 | 
61 | - [Quick Start](getting-started/5-minute-setup)
62 | - [Core Features](core-features)
63 | - [Production Setup](production)
64 | - [API Reference](api)
65 | 
66 | ## Development
67 | 
68 | Find issues or want to contribute?
69 | - [Source Code](https://github.com/teilomillet/hapax)
70 | - [Issue Tracker](https://github.com/teilomillet/hapax/issues)
71 | - [Security Guide](production/security)
72 | - [Configuration Reference](getting-started/configuration)


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Installation
  4 | nav_order: 2
  5 | ---
  6 | 
  7 | # Installation Guide
  8 | 
  9 | This guide helps you choose and implement the best installation method for your needs. Each method has been thoroughly tested and validated in production environments.
 10 | 
 11 | ## Choosing Your Installation Method
 12 | 
 13 | Hapax offers multiple installation methods to accommodate different use cases. Whether you're evaluating the service, developing new features, or deploying to production, there's a path designed for your needs.
 14 | 
 15 | ### Quick Decision Guide
 16 | 1. **Docker Installation** (Recommended for most users)
 17 |    - Best for: Quick testing, production deployments
 18 |    - Advantages: Isolated environment, easy updates, verified base image (~17MB)
 19 |    - Trade-offs: Less customization flexibility
 20 |    
 21 | 2. **Manual Installation**
 22 |    - Best for: Development, customization
 23 |    - Advantages: Full control, easier debugging, standard Go toolchain
 24 |    - Trade-offs: More setup steps, environment management
 25 | 
 26 | 3. **Production Setup**
 27 |    - Best for: Enterprise deployments
 28 |    - Advantages: Scalability (tested to 100+ concurrent users), built-in monitoring
 29 |    - Trade-offs: More complex configuration, resource intensive
 30 | 
 31 | Take a moment to consider your primary goal. This will help you choose the most appropriate installation method:
 32 | - "I want to try Hapax quickly" → Docker Quick Start (5-minute setup)
 33 | - "I need to modify the code" → Manual Installation (standard Go project)
 34 | - "I'm deploying to production" → Production Setup (enterprise-ready)
 35 | 
 36 | ## System Requirements
 37 | 
 38 | Before you begin installation, ensure your environment meets the necessary requirements. We've separated these into mandatory and optional components to help you plan your deployment effectively.
 39 | 
 40 | ### Mandatory Requirements (Why?)
 41 | - **LLM Provider Access**: Core functionality depends on LLM API
 42 | - **API Keys**: Secure provider authentication
 43 | - **512MB RAM**: Verified base memory footprint
 44 | - **100MB Disk**: Tested minimum storage requirement
 45 | - **Go 1.22+**: Latest stable release support
 46 | 
 47 | ### Optional Requirements (Why?)
 48 | - **2+ CPU Cores**: Verified for concurrent request handling
 49 | - **2GB+ RAM**: Tested for caching and queue management
 50 | - **1GB+ Disk**: Validated for logging and metrics
 51 | - **TLS Certificates**: Production security (HTTP/3 support)
 52 | - **Docker**: Industry-standard containerization
 53 | 
 54 | ## Installation Methods
 55 | 
 56 | Now that you've chosen your installation method and verified your system requirements, let's proceed with the installation. Each method includes verification steps to ensure everything is working correctly.
 57 | 
 58 | ### 1. Docker Quick Start (5 minutes)
 59 | The Docker installation method provides the fastest path to a running system. It's preconfigured with sensible defaults and includes all necessary dependencies.
 60 | 
 61 | ```bash
 62 | docker run -p 8080:8080 \
 63 |   -e OPENAI_API_KEY=your_key \
 64 |   -e CONFIG_PATH=/app/config.yaml \
 65 |   teilomillet/hapax:latest
 66 | ```
 67 | 
 68 | After running this command, take a moment to verify the installation:
 69 | ```bash
 70 | # Should return HTTP 200
 71 | curl http://localhost:8080/health
 72 | ```
 73 | 
 74 | ### 2. Manual Installation (15 minutes)
 75 | The manual installation gives you full control over the build process and is ideal for development work. Follow these steps carefully:
 76 | 
 77 | 1. Clone and build:
 78 |    ```bash
 79 |    git clone https://github.com/teilomillet/hapax.git
 80 |    cd hapax
 81 |    go build -o hapax cmd/hapax/main.go
 82 |    ```
 83 | 
 84 | 2. Configure:
 85 |    ```bash
 86 |    cp config.example.yaml config.yaml
 87 |    # Required: Provider configuration
 88 |    export OPENAI_API_KEY="your_key"
 89 |    # Optional: Logging setup
 90 |    export LOG_LEVEL="info"
 91 |    ```
 92 | 
 93 | 3. Run:
 94 |    ```bash
 95 |    ./hapax --config config.yaml
 96 |    ```
 97 | 
 98 | ### 3. Production Setup (30 minutes)
 99 | For production environments, we recommend this more robust setup that includes logging, monitoring, and automatic restarts:
100 | 
101 | ```bash
102 | docker run -d \
103 |   --name hapax \
104 |   --restart unless-stopped \
105 |   -p 8080:8080 \
106 |   -v $(pwd)/config.yaml:/app/config.yaml \
107 |   -v $(pwd)/logs:/app/logs \
108 |   -e OPENAI_API_KEY=your_key \
109 |   --log-driver=json-file \
110 |   --log-opt max-size=10m \
111 |   teilomillet/hapax:latest
112 | ```
113 | 
114 | ## Verification Guide
115 | 
116 | After installation, it's crucial to verify that everything is working correctly. We provide a series of checks that progress from basic connectivity to full functionality testing.
117 | 
118 | ### How to Know It's Working
119 | 
120 | 1. **Health Check** (Basic Verification)
121 |    ```bash
122 |    curl http://localhost:8080/health
123 |    # Expected: {"status":"ok"}
124 |    ```
125 | 
126 | 2. **Functionality Test** (Core Feature Check)
127 |    ```bash
128 |    curl -X POST http://localhost:8080/v1/completions \
129 |      -H "Content-Type: application/json" \
130 |      -d '{"messages":[{"role":"user","content":"Hello"}]}'
131 |    # Expected: Response with generated content
132 |    ```
133 | 
134 | 3. **Performance Check** (Optional)
135 |    ```bash
136 |    curl http://localhost:8080/metrics
137 |    # Expected: Prometheus metrics data
138 |    ```
139 | 
140 | ### Common Issues and Solutions
141 | 
142 | If you encounter any issues during installation or verification, here are some common problems and their solutions:
143 | 
144 | 1. **API Key Issues**
145 |    - Symptom: 401 Unauthorized
146 |    - Solution: Check environment variables
147 |    ```bash
148 |    echo $OPENAI_API_KEY # Should show your key
149 |    ```
150 | 
151 | 2. **Port Conflicts**
152 |    - Symptom: Address already in use
153 |    - Solution: Change port in config or check running processes
154 |    ```bash
155 |    lsof -i :8080 # Check port usage
156 |    ```
157 | 
158 | 3. **Configuration Errors**
159 |    - Symptom: Server won't start
160 |    - Solution: Validate configuration
161 |    ```bash
162 |    ./hapax --validate --config config.yaml
163 |    ```
164 | 
165 | ## When Can You Use It?
166 | 
167 | You'll know your Hapax installation is ready for use when you've completed these key checkpoints:
168 | 1. Health check returns `{"status":"ok"}`
169 | 2. Test completion request succeeds
170 | 3. No errors in logs (`docker logs hapax` or local logs)
171 | 
172 | ### Next Steps After Installation
173 | Once your installation is verified, consider these steps to enhance your deployment:
174 | - Configure additional providers for redundancy
175 | - Enable optional features based on your needs
176 | - Set up monitoring for production visibility
177 | - Implement security measures for your environment
178 | 
179 | Need help? Our documentation and community resources are here to assist:
180 | - [Configuration Guide](configuration.md)
181 | - [GitHub Issues](https://github.com/teilomillet/hapax/issues)
182 | - [Full Documentation](https://teilomillet.github.io/hapax) 


--------------------------------------------------------------------------------
/docs/performance.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: page
  3 | title: Performance
  4 | nav_order: 4
  5 | ---
  6 | 
  7 | # Performance Guide
  8 | 
  9 | This guide covers performance optimization strategies for Hapax, including HTTP/3, caching, queuing, and load management.
 10 | 
 11 | ## Performance Features
 12 | 
 13 | ### HTTP/3 Support
 14 | 
 15 | Hapax supports HTTP/3 (QUIC) for improved performance:
 16 | 
 17 | ```yaml
 18 | server:
 19 |   http3:
 20 |     enabled: true
 21 |     port: 443
 22 |     tls_cert_file: "/etc/certs/server.crt"
 23 |     tls_key_file: "/etc/certs/server.key"
 24 |     idle_timeout: 30s
 25 |     max_bi_streams_concurrent: 100     # Concurrent bidirectional streams
 26 |     max_uni_streams_concurrent: 100     # Concurrent unidirectional streams
 27 |     max_stream_receive_window: 6291456       # 6MB stream window
 28 |     max_connection_receive_window: 15728640   # 15MB connection window
 29 |     enable_0rtt: true            # Enable 0-RTT for faster connections
 30 |     max_0rtt_size: 16384         # 16KB max 0-RTT size
 31 |     allow_0rtt_replay: false     # Disable replay protection
 32 |     udp_receive_buffer_size: 8388608   # 8MB UDP buffer
 33 | ```
 34 | 
 35 | Benefits of HTTP/3:
 36 | - Improved connection establishment
 37 | - Better multiplexing
 38 | - Reduced head-of-line blocking
 39 | - Enhanced mobile performance
 40 | - Faster connection recovery
 41 | 
 42 | ### Response Caching
 43 | 
 44 | Three caching strategies available:
 45 | 
 46 | ```yaml
 47 | llm:
 48 |   cache:
 49 |     enable: true
 50 |     type: "redis"        # Options: memory, redis, file
 51 |     ttl: 24h            # Cache entry lifetime
 52 |     max_size: 1000      # Maximum entries/size
 53 |     redis:              # Redis-specific settings
 54 |       address: "localhost:6379"
 55 |       password: ${REDIS_PASSWORD}
 56 |       db: 0
 57 | ```
 58 | 
 59 | Cache types:
 60 | - Memory: Fast, non-persistent, cleared on restart
 61 | - Redis: Persistent, distributed, good for clusters
 62 | - File: Persistent, good for single instances
 63 | 
 64 | ### Request Queuing
 65 | 
 66 | Queue system for high-load scenarios:
 67 | 
 68 | ```yaml
 69 | queue:
 70 |   enabled: true
 71 |   initial_size: 1000         # Starting queue capacity
 72 |   state_path: "/var/lib/hapax/queue.state"  # Persistence path
 73 |   save_interval: 30s         # State save frequency
 74 | ```
 75 | 
 76 | Benefits:
 77 | - Handles traffic spikes
 78 | - Prevents system overload
 79 | - Optional state persistence
 80 | - Configurable queue size
 81 | 
 82 | ### Circuit Breaker
 83 | 
 84 | Protects system from cascading failures:
 85 | 
 86 | ```yaml
 87 | circuit_breaker:
 88 |   max_requests: 100          # Requests in half-open state
 89 |   interval: 30s              # Monitoring interval
 90 |   timeout: 10s              # Time in open state
 91 |   failure_threshold: 5      # Failures before opening
 92 | ```
 93 | 
 94 | States:
 95 | - Closed: Normal operation
 96 | - Open: Stop requests after failures
 97 | - Half-Open: Testing recovery
 98 | 
 99 | ### Provider Failover
100 | 
101 | Automatic provider switching for reliability:
102 | 
103 | ```yaml
104 | providers:
105 |   anthropic:
106 |     type: anthropic
107 |     model: claude-3-haiku
108 |     api_key: ${ANTHROPIC_API_KEY}
109 |   openai:
110 |     type: openai
111 |     model: gpt-4
112 |     api_key: ${OPENAI_API_KEY}
113 | 
114 | provider_preference:
115 |   - anthropic
116 |   - openai
117 | ```
118 | 
119 | Features:
120 | - Automatic failover
121 | - Health monitoring
122 | - Configurable preference order
123 | - Seamless switching
124 | 
125 | ## Performance Tuning
126 | 
127 | ### Memory Optimization
128 | 
129 | Adjust these settings based on available memory:
130 | - `max_header_bytes`: HTTP header size limit
131 | - `max_stream_receive_window`: Per-stream buffer
132 | - `max_connection_receive_window`: Per-connection buffer
133 | - Cache size limits
134 | 
135 | ### Concurrency Settings
136 | 
137 | Tune these for your workload:
138 | - `max_bi_streams_concurrent`: Bidirectional streams
139 | - `max_uni_streams_concurrent`: Unidirectional streams
140 | - Queue size and persistence
141 | - Circuit breaker thresholds
142 | 
143 | ### Network Optimization
144 | 
145 | Network performance settings:
146 | - HTTP/3 buffer sizes
147 | - UDP receive buffer size
148 | - Idle timeouts
149 | - 0-RTT configuration
150 | 
151 | ### Monitoring Performance
152 | 
153 | Use built-in metrics:
154 | ```yaml
155 | routes:
156 |   - path: "/metrics"
157 |     handler: "metrics"
158 |     version: "v1"
159 |     methods: ["GET"]
160 |     middleware: ["auth"]
161 | ```
162 | 
163 | Available metrics:
164 | - Request latencies
165 | - Queue lengths
166 | - Cache hit rates
167 | - Circuit breaker states
168 | - Provider health status
169 | 
170 | ## Best Practices
171 | 
172 | ### Development Environment
173 | ```yaml
174 | server:
175 |   port: 8080
176 |   http3:
177 |     enabled: false
178 | llm:
179 |   cache:
180 |     type: "memory"
181 |     max_size: 1000
182 | queue:
183 |   enabled: false
184 | ```
185 | 
186 | ### Production Environment
187 | ```yaml
188 | server:
189 |   port: 443
190 |   http3:
191 |     enabled: true
192 |     max_bi_streams_concurrent: 200
193 |     max_stream_receive_window: 8388608  # 8MB
194 | llm:
195 |   cache:
196 |     type: "redis"
197 |     ttl: 24h
198 | queue:
199 |   enabled: true
200 |   initial_size: 5000
201 |   state_path: "/var/lib/hapax/queue.state"
202 | circuit_breaker:
203 |   max_requests: 200
204 |   failure_threshold: 10
205 | ```
206 | 
207 | ### High-Load Environment
208 | ```yaml
209 | server:
210 |   http3:
211 |     max_bi_streams_concurrent: 500
212 |     max_stream_receive_window: 16777216  # 16MB
213 |     max_connection_receive_window: 33554432  # 32MB
214 |     udp_receive_buffer_size: 16777216  # 16MB
215 | llm:
216 |   cache:
217 |     type: "redis"
218 |     max_size: 10000
219 | queue:
220 |   enabled: true
221 |   initial_size: 10000
222 | circuit_breaker:
223 |   max_requests: 500
224 |   interval: 60s
225 | ```
226 | 
227 | ## Troubleshooting
228 | 
229 | Common performance issues and solutions:
230 | 
231 | ### High Latency
232 | - Enable HTTP/3
233 | - Increase stream windows
234 | - Adjust UDP buffer size
235 | - Check provider health
236 | 
237 | ### Memory Usage
238 | - Reduce cache size
239 | - Lower stream limits
240 | - Adjust queue size
241 | - Monitor metrics
242 | 
243 | ### Request Failures
244 | - Check circuit breaker logs
245 | - Verify provider health
246 | - Adjust retry settings
247 | - Enable failover
248 | 
249 | ### Queue Overflow
250 | - Increase queue size
251 | - Enable persistence
252 | - Adjust circuit breaker
253 | - Scale horizontally


--------------------------------------------------------------------------------
/errors/errors.go:
--------------------------------------------------------------------------------
  1 | // Package errors provides a comprehensive error handling system for the Hapax LLM gateway.
  2 | // It includes structured error types, JSON response formatting, request ID tracking,
  3 | // and integrated logging with Uber's zap logger.
  4 | //
  5 | // The package is designed to be used throughout the Hapax codebase to provide
  6 | // consistent error handling and reporting. It offers several key features:
  7 | //
  8 | //   - Structured JSON error responses with type information
  9 | //   - Request ID tracking for error correlation
 10 | //   - Integrated logging with zap
 11 | //   - Custom error types for different scenarios
 12 | //   - Middleware integration for panic recovery
 13 | //
 14 | // Basic usage:
 15 | //
 16 | //	// Simple error response
 17 | //	errors.Error(w, "Something went wrong", http.StatusBadRequest)
 18 | //
 19 | //	// Type-specific error with context
 20 | //	errors.ErrorWithType(w, "Invalid input", errors.ValidationError, http.StatusBadRequest)
 21 | //
 22 | // For more complex scenarios, you can use the error constructors in types.go:
 23 | //
 24 | //	err := errors.NewValidationError(requestID, "Invalid input", map[string]interface{}{
 25 | //	    "field": "username",
 26 | //	    "error": "required",
 27 | //	})
 28 | package errors
 29 | 
 30 | import (
 31 | 	"encoding/json"
 32 | 	"fmt"
 33 | 	"net/http"
 34 | 
 35 | 	"go.uber.org/zap"
 36 | )
 37 | 
 38 | // DefaultLogger is the default zap logger instance used throughout the package.
 39 | // It is initialized to a production configuration but can be overridden using SetLogger.
 40 | var DefaultLogger *zap.Logger
 41 | 
 42 | func init() {
 43 | 	var err error
 44 | 	DefaultLogger, err = zap.NewProduction()
 45 | 	if err != nil {
 46 | 		DefaultLogger = zap.NewNop()
 47 | 	}
 48 | }
 49 | 
 50 | // SetLogger allows setting a custom zap logger instance.
 51 | // If nil is provided, the function will do nothing to prevent
 52 | // accidentally disabling logging.
 53 | func SetLogger(logger *zap.Logger) {
 54 | 	if logger != nil {
 55 | 		DefaultLogger = logger
 56 | 	}
 57 | }
 58 | 
 59 | // ErrorType represents different categories of errors that can occur
 60 | // in the Hapax system. Each type corresponds to a specific kind of
 61 | // error scenario and carries appropriate HTTP status codes and handling logic.
 62 | type ErrorType string
 63 | 
 64 | const (
 65 | 	// AuthError represents authentication and authorization failures
 66 | 	AuthError ErrorType = "authentication_error"
 67 | 
 68 | 	// ValidationError represents input validation failures
 69 | 	ValidationError ErrorType = "validation_error"
 70 | 
 71 | 	// InternalError represents unexpected internal server errors
 72 | 	InternalError ErrorType = "internal_error"
 73 | 
 74 | 	// ConfigError represents configuration-related errors
 75 | 	ConfigError ErrorType = "config_error"
 76 | 
 77 | 	// ProviderError represents errors from LLM providers
 78 | 	ProviderError ErrorType = "provider_error"
 79 | 
 80 | 	// RateLimitError represents rate limiting errors
 81 | 	RateLimitError ErrorType = "rate_limit_error"
 82 | 
 83 | 	// AuthenticationError represents API key authentication failures
 84 | 	AuthenticationError ErrorType = "api_key_error"
 85 | 
 86 | 	// BadRequestError represents invalid request format or parameters
 87 | 	BadRequestError ErrorType = "bad_request"
 88 | 
 89 | 	// NotFoundError represents resource not found errors
 90 | 	NotFoundError ErrorType = "not_found"
 91 | 
 92 | 	// UnauthorizedError represents unauthorized access attempts
 93 | 	UnauthorizedError ErrorType = "unauthorized"
 94 | 
 95 | 	// TimeoutError represents timeout errors
 96 | 	TimeoutError ErrorType = "timeout_error"
 97 | )
 98 | 
 99 | // HapaxError is our custom error type that implements the error interface
100 | // and provides additional context about the error. It is designed to be
101 | // serialized to JSON for API responses while maintaining internal error
102 | // context for logging and debugging.
103 | type HapaxError struct {
104 | 	// Type categorizes the error for client handling
105 | 	Type ErrorType `json:"type"`
106 | 
107 | 	// Message is a human-readable error description
108 | 	Message string `json:"message"`
109 | 
110 | 	// Code is the HTTP status code (not exposed in JSON)
111 | 	Code int `json:"-"`
112 | 
113 | 	// RequestID links the error to a specific request
114 | 	RequestID string `json:"RequestIDKey"`
115 | 
116 | 	// Details contains additional error context
117 | 	Details map[string]interface{} `json:"details,omitempty"`
118 | 
119 | 	// err is the underlying error (not exposed in JSON)
120 | 	err error
121 | }
122 | 
123 | // Error implements the error interface. It returns a string that
124 | // combines the error type, message, and underlying error (if any).
125 | func (e *HapaxError) Error() string {
126 | 	if e.err != nil {
127 | 		return fmt.Sprintf("%s: %s: %v", e.Type, e.Message, e.err)
128 | 	}
129 | 	return fmt.Sprintf("%s: %s", e.Type, e.Message)
130 | }
131 | 
132 | // Unwrap returns the underlying error, implementing the unwrap
133 | // interface for error chains.
134 | func (e *HapaxError) Unwrap() error {
135 | 	return e.err
136 | }
137 | 
138 | // Is implements error matching for errors.Is, allowing type-based
139 | // error matching while ignoring other fields.
140 | func (e *HapaxError) Is(target error) bool {
141 | 	t, ok := target.(*HapaxError)
142 | 	if !ok {
143 | 		return false
144 | 	}
145 | 	return e.Type == t.Type
146 | }
147 | 
148 | // WriteError formats and writes a HapaxError to an http.ResponseWriter.
149 | // It sets the appropriate content type and status code, then writes
150 | // the error as a JSON response.
151 | func WriteError(w http.ResponseWriter, err *HapaxError) {
152 | 	w.Header().Set("Content-Type", "application/json")
153 | 	w.WriteHeader(err.Code)
154 | 
155 | 	// Check the error return from Encode
156 | 	if encodeErr := json.NewEncoder(w).Encode(&ErrorResponse{
157 | 		Type:      err.Type,
158 | 		Message:   err.Message,
159 | 		RequestID: err.RequestID,
160 | 		Details:   err.Details,
161 | 	}); encodeErr != nil {
162 | 		// Log the encoding error
163 | 		zap.L().Error("Failed to encode error response", zap.Error(encodeErr))
164 | 
165 | 		// Attempt to send a fallback error response using the existing error handling mechanism
166 | 		ErrorWithType(w, "Failed to encode error response", ProviderError, http.StatusInternalServerError)
167 | 	}
168 | }
169 | 
170 | // Error is a drop-in replacement for http.Error that creates and writes
171 | // a HapaxError with the InternalError type. It automatically includes
172 | // the request ID from the response headers if available.
173 | func Error(w http.ResponseWriter, message string, code int) {
174 | 	requestID := w.Header().Get("X-Request-ID")
175 | 	err := &HapaxError{
176 | 		Type:      InternalError,
177 | 		Message:   message,
178 | 		Code:      code,
179 | 		RequestID: requestID,
180 | 	}
181 | 	WriteError(w, err)
182 | }
183 | 
184 | // ErrorWithType is like Error but allows specifying the error type.
185 | // This is useful when you want to indicate specific error categories
186 | // to the client while maintaining the simple interface of http.Error.
187 | func ErrorWithType(w http.ResponseWriter, message string, errType ErrorType, code int) {
188 | 	requestID := w.Header().Get("X-Request-ID")
189 | 	err := &HapaxError{
190 | 		Type:      errType,
191 | 		Message:   message,
192 | 		Code:      code,
193 | 		RequestID: requestID,
194 | 	}
195 | 	WriteError(w, err)
196 | }
197 | 


--------------------------------------------------------------------------------
/errors/errors_test.go:
--------------------------------------------------------------------------------
 1 | package errors
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestHapaxError_Error(t *testing.T) {
 9 | 	tests := []struct {
10 | 		name    string
11 | 		err     *HapaxError
12 | 		want    string
13 | 		wantErr bool
14 | 	}{
15 | 		{
16 | 			name: "basic error without wrapped error",
17 | 			err: &HapaxError{
18 | 				Type:    ValidationError,
19 | 				Message: "invalid input",
20 | 			},
21 | 			want: "validation_error: invalid input",
22 | 		},
23 | 		{
24 | 			name: "error with wrapped error",
25 | 			err: &HapaxError{
26 | 				Type:    InternalError,
27 | 				Message: "processing failed",
28 | 				err:     errors.New("database connection failed"),
29 | 			},
30 | 			want: "internal_error: processing failed: database connection failed",
31 | 		},
32 | 	}
33 | 
34 | 	for _, tt := range tests {
35 | 		t.Run(tt.name, func(t *testing.T) {
36 | 			got := tt.err.Error()
37 | 			if got != tt.want {
38 | 				t.Errorf("HapaxError.Error() = %v, want %v", got, tt.want)
39 | 			}
40 | 		})
41 | 	}
42 | }
43 | 
44 | func TestHapaxError_Is(t *testing.T) {
45 | 	err1 := &HapaxError{Type: AuthError, Message: "test1"}
46 | 	err2 := &HapaxError{Type: AuthError, Message: "test2"}
47 | 	err3 := &HapaxError{Type: ValidationError, Message: "test3"}
48 | 
49 | 	if !err1.Is(err2) {
50 | 		t.Error("Expected err1.Is(err2) to be true for same error type")
51 | 	}
52 | 
53 | 	if err1.Is(err3) {
54 | 		t.Error("Expected err1.Is(err3) to be false for different error types")
55 | 	}
56 | }
57 | 
58 | func TestHapaxError_Unwrap(t *testing.T) {
59 | 	innerErr := errors.New("inner error")
60 | 	err := &HapaxError{
61 | 		Type:    InternalError,
62 | 		Message: "outer error",
63 | 		err:     innerErr,
64 | 	}
65 | 
66 | 	if unwrapped := err.Unwrap(); unwrapped != innerErr {
67 | 		t.Errorf("Unwrap() = %v, want %v", unwrapped, innerErr)
68 | 	}
69 | }
70 | 


--------------------------------------------------------------------------------
/errors/handlers.go:
--------------------------------------------------------------------------------
 1 | // Package errors provides error handling middleware and utilities.
 2 | package errors
 3 | 
 4 | import (
 5 | 	"net/http"
 6 | 	"runtime/debug"
 7 | 
 8 | 	"go.uber.org/zap"
 9 | )
10 | 
11 | // ErrorHandler wraps an http.Handler and provides error handling
12 | // If a panic occurs during request processing, it:
13 | //  1. Logs the panic and stack trace
14 | //  2. Returns a 500 Internal Server Error to the client
15 | //  3. Includes the request ID in both the log and response
16 | //
17 | // The panic recovery ensures that the server continues running even if
18 | // individual requests panic. All panics are logged with their stack traces
19 | // for debugging purposes.
20 | //
21 | // Example usage:
22 | //
23 | //	router.Use(errors.ErrorHandler(logger))
24 | func ErrorHandler(logger *zap.Logger) func(http.Handler) http.Handler {
25 | 	return func(next http.Handler) http.Handler {
26 | 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
27 | 			defer func() {
28 | 				if err := recover(); err != nil {
29 | 					stack := debug.Stack()
30 | 					logger.Error("panic recovered",
31 | 						zap.Any("error", err),
32 | 						zap.ByteString("stacktrace", stack),
33 | 						zap.String(string(RequestIDKey), r.Header.Get("X-Request-ID")),
34 | 					)
35 | 
36 | 					hapaxErr := NewInternalError(r.Header.Get("X-Request-ID"), nil)
37 | 					WriteError(w, hapaxErr)
38 | 				}
39 | 			}()
40 | 
41 | 			next.ServeHTTP(w, r)
42 | 		})
43 | 	}
44 | }
45 | 
46 | // LogError logs an error with its context
47 | // It ensures that all errors are properly logged with their context, including:
48 | //   - Error type and message
49 | //   - Request ID
50 | //   - HTTP method and URL
51 | //   - Status code
52 | //
53 | // Example usage:
54 | //
55 | //	errors.LogError(logger, err, requestID)
56 | func LogError(logger *zap.Logger, err error, requestID string) {
57 | 	if hapaxErr, ok := err.(*HapaxError); ok {
58 | 		logger.Error("request error",
59 | 			zap.String("error_type", string(hapaxErr.Type)),
60 | 			zap.String("message", hapaxErr.Message),
61 | 			zap.Int("code", hapaxErr.Code),
62 | 			zap.String(string(RequestIDKey), requestID),
63 | 			zap.Any("details", hapaxErr.Details),
64 | 		)
65 | 	} else {
66 | 		logger.Error("unexpected error",
67 | 			zap.Error(err),
68 | 			zap.String(string(RequestIDKey), requestID),
69 | 		)
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/errors/handlers_test.go:
--------------------------------------------------------------------------------
 1 | package errors
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"net/http/httptest"
 6 | 	"testing"
 7 | 
 8 | 	"go.uber.org/zap"
 9 | )
10 | 
11 | func TestErrorHandler(t *testing.T) {
12 | 	logger := zap.NewNop()
13 | 
14 | 	tests := []struct {
15 | 		name           string
16 | 		handler       http.Handler
17 | 		expectedCode  int
18 | 		expectPanic   bool
19 | 	}{
20 | 		{
21 | 			name: "normal handler",
22 | 			handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
23 | 				w.WriteHeader(http.StatusOK)
24 | 			}),
25 | 			expectedCode: http.StatusOK,
26 | 			expectPanic:  false,
27 | 		},
28 | 		{
29 | 			name: "panicking handler",
30 | 			handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
31 | 				panic("test panic")
32 | 			}),
33 | 			expectedCode: http.StatusInternalServerError,
34 | 			expectPanic:  true,
35 | 		},
36 | 	}
37 | 
38 | 	for _, tt := range tests {
39 | 		t.Run(tt.name, func(t *testing.T) {
40 | 			// Create a test request
41 | 			req := httptest.NewRequest("GET", "/test", nil)
42 | 			req.Header.Set("X-Request-ID", "test-request-id")
43 | 			
44 | 			// Create a response recorder
45 | 			rr := httptest.NewRecorder()
46 | 
47 | 			// Wrap the handler with our error handler
48 | 			handler := ErrorHandler(logger)(tt.handler)
49 | 
50 | 			// Execute the handler
51 | 			handler.ServeHTTP(rr, req)
52 | 
53 | 			// Check the status code
54 | 			if rr.Code != tt.expectedCode {
55 | 				t.Errorf("handler returned wrong status code: got %v want %v",
56 | 					rr.Code, tt.expectedCode)
57 | 			}
58 | 		})
59 | 	}
60 | }
61 | 
62 | func TestLogError(t *testing.T) {
63 | 	logger := zap.NewNop()
64 | 	requestID := "test-request-id"
65 | 
66 | 	// Test logging a HapaxError
67 | 	hapaxErr := NewValidationError(requestID, "test error", nil)
68 | 	LogError(logger, hapaxErr, requestID)
69 | 
70 | 	// Test logging a standard error
71 | 	standardErr := NewInternalError(requestID, nil)
72 | 	LogError(logger, standardErr, requestID)
73 | 	
74 | 	// Note: Since we're using a NOP logger, we can't verify the output
75 | 	// In a real application, you might want to use zap/zaptest for more detailed assertions
76 | }
77 | 


--------------------------------------------------------------------------------
/errors/responses.go:
--------------------------------------------------------------------------------
 1 | // Package errors provides error response utilities.
 2 | package errors
 3 | 
 4 | import (
 5 | 	"errors"
 6 | )
 7 | 
 8 | const RequestIDKey = "request_id"
 9 | 
10 | // ErrorResponse represents a standardized error response format
11 | // that is returned to clients when an error occurs. It includes:
12 | //   - Error type for categorization
13 | //   - Human-readable message
14 | //   - Request ID for correlation
15 | //   - Optional details for additional context
16 | type ErrorResponse struct {
17 | 	Type      ErrorType              `json:"type"`
18 | 	Message   string                 `json:"message"`
19 | 	RequestID string                 `json:"request_id"`
20 | 	Details   map[string]interface{} `json:"details,omitempty"`
21 | }
22 | 
23 | // As is a wrapper around errors.As for better error type assertion
24 | func As(err error, target interface{}) bool {
25 | 	return errors.As(err, target)
26 | }
27 | 


--------------------------------------------------------------------------------
/errors/responses_test.go:
--------------------------------------------------------------------------------
 1 | package errors
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"net/http"
 6 | 	"net/http/httptest"
 7 | 	"testing"
 8 | )
 9 | 
10 | func TestWriteError(t *testing.T) {
11 | 	tests := []struct {
12 | 		name           string
13 | 		err            *HapaxError
14 | 		expectedCode   int
15 | 		expectedType   ErrorType
16 | 		expectedFields []string
17 | 	}{
18 | 		{
19 | 			name: "hapax error",
20 | 			err: &HapaxError{
21 | 				Type:      AuthError,
22 | 				Message:   "unauthorized",
23 | 				Code:      http.StatusUnauthorized,
24 | 				RequestID: "test-id",
25 | 			},
26 | 			expectedCode: http.StatusUnauthorized,
27 | 			expectedType: AuthError,
28 | 			expectedFields: []string{"type", "message", string(RequestIDKey)},
29 | 		},
30 | 		{
31 | 			name: "error with details",
32 | 			err: &HapaxError{
33 | 				Type:      ValidationError,
34 | 				Message:   "validation failed",
35 | 				Code:      http.StatusBadRequest,
36 | 				RequestID: "test-id",
37 | 				Details: map[string]interface{}{
38 | 					"field": "username",
39 | 					"error": "required",
40 | 				},
41 | 			},
42 | 			expectedCode: http.StatusBadRequest,
43 | 			expectedType: ValidationError,
44 | 			expectedFields: []string{"type", "message", string(RequestIDKey), "details"},
45 | 		},
46 | 	}
47 | 
48 | 	for _, tt := range tests {
49 | 		t.Run(tt.name, func(t *testing.T) {
50 | 			rr := httptest.NewRecorder()
51 | 
52 | 			WriteError(rr, tt.err)
53 | 
54 | 			if rr.Code != tt.expectedCode {
55 | 				t.Errorf("WriteError() status = %v, want %v", rr.Code, tt.expectedCode)
56 | 			}
57 | 
58 | 			contentType := rr.Header().Get("Content-Type")
59 | 			if contentType != "application/json" {
60 | 				t.Errorf("WriteError() content-type = %v, want application/json", contentType)
61 | 			}
62 | 
63 | 			var response map[string]interface{}
64 | 			if err := json.NewDecoder(rr.Body).Decode(&response); err != nil {
65 | 				t.Fatalf("Failed to decode response body: %v", err)
66 | 			}
67 | 
68 | 			if errorType, ok := response["type"].(string); !ok || ErrorType(errorType) != tt.expectedType {
69 | 				t.Errorf("WriteError() error type = %v, want %v", errorType, tt.expectedType)
70 | 			}
71 | 
72 | 			for _, field := range tt.expectedFields {
73 | 				if _, exists := response[field]; !exists {
74 | 					t.Errorf("WriteError() missing expected field: %s", field)
75 | 				}
76 | 			}
77 | 		})
78 | 	}
79 | }
80 | 


--------------------------------------------------------------------------------
/errors/types.go:
--------------------------------------------------------------------------------
  1 | package errors
  2 | 
  3 | import (
  4 | 	"net/http"
  5 | )
  6 | 
  7 | // NewError creates a new HapaxError with the given parameters.
  8 | // It is a general-purpose constructor that allows full control over
  9 | // the error's fields. For most cases, you should use one of the
 10 | // specialized constructors below.
 11 | //
 12 | // Example:
 13 | //
 14 | //	err := NewError(InternalError, "database connection failed", 500, "req_123", nil, dbErr)
 15 | func NewError(errType ErrorType, message string, code int, requestID string, details map[string]interface{}, err error) *HapaxError {
 16 | 	return &HapaxError{
 17 | 		Type:      errType,
 18 | 		Message:   message,
 19 | 		Code:      code,
 20 | 		RequestID: requestID,
 21 | 		Details:   details,
 22 | 		err:       err,
 23 | 	}
 24 | }
 25 | 
 26 | // NewAuthError creates an authentication error with appropriate defaults.
 27 | // Use this for any authentication or authorization failures, such as:
 28 | //   - Invalid API keys
 29 | //   - Missing credentials
 30 | //   - Insufficient permissions
 31 | //
 32 | // Example:
 33 | //
 34 | //	err := NewAuthError("req_123", "Invalid API key", nil)
 35 | func NewAuthError(requestID, message string, err error) *HapaxError {
 36 | 	return &HapaxError{
 37 | 		Type:      AuthError,
 38 | 		Message:   message,
 39 | 		Code:      http.StatusUnauthorized,
 40 | 		RequestID: requestID,
 41 | 		err:       err,
 42 | 		Details: map[string]interface{}{
 43 | 			"suggestion": "Please check your authentication credentials",
 44 | 		},
 45 | 	}
 46 | }
 47 | 
 48 | // NewValidationError creates a validation error with appropriate defaults.
 49 | // Use this for any request validation failures, such as:
 50 | //   - Invalid input formats
 51 | //   - Missing required fields
 52 | //   - Value constraint violations
 53 | //   - Invalid request methods
 54 | //
 55 | // Example:
 56 | //
 57 | //	err := NewValidationError("req_123", "Invalid prompt", map[string]interface{}{
 58 | //	    "field": "prompt",
 59 | //	    "error": "must not be empty",
 60 | //	})
 61 | func NewValidationError(requestID, message string, validationDetails map[string]interface{}) *HapaxError {
 62 | 	code := http.StatusBadRequest
 63 | 	if message == "Method not allowed" {
 64 | 		code = http.StatusMethodNotAllowed
 65 | 	}
 66 | 	return &HapaxError{
 67 | 		Type:      ValidationError,
 68 | 		Message:   message,
 69 | 		Code:      code,
 70 | 		RequestID: requestID,
 71 | 		Details:   validationDetails,
 72 | 	}
 73 | }
 74 | 
 75 | // NewRateLimitError creates a rate limit error with appropriate defaults.
 76 | // Use this when a client has exceeded their quota or rate limits, such as:
 77 | //   - Too many requests per second
 78 | //   - Monthly API quota exceeded
 79 | //   - Concurrent request limit reached
 80 | //
 81 | // Example:
 82 | //
 83 | //	err := NewRateLimitError("req_123", 30)
 84 | func NewRateLimitError(requestID string, retryAfter int) *HapaxError {
 85 | 	return &HapaxError{
 86 | 		Type:      RateLimitError,
 87 | 		Message:   "Rate limit exceeded",
 88 | 		Code:      http.StatusTooManyRequests,
 89 | 		RequestID: requestID,
 90 | 		Details: map[string]interface{}{
 91 | 			"retry_after": retryAfter,
 92 | 		},
 93 | 	}
 94 | }
 95 | 
 96 | // NewProviderError creates a provider error with appropriate defaults.
 97 | // Use this when the underlying LLM provider encounters an error, such as:
 98 | //   - Provider API errors
 99 | //   - Model unavailability
100 | //   - Invalid provider configuration
101 | //
102 | // Example:
103 | //
104 | //	err := NewProviderError("req_123", "Model unavailable", providerErr)
105 | func NewProviderError(requestID string, message string, err error) *HapaxError {
106 | 	return &HapaxError{
107 | 		Type:      ProviderError,
108 | 		Message:   message,
109 | 		Code:      http.StatusBadGateway,
110 | 		RequestID: requestID,
111 | 		err:       err,
112 | 	}
113 | }
114 | 
115 | // NewInternalError creates an internal server error with appropriate defaults.
116 | // Use this for unexpected errors that are not covered by other error types:
117 | //   - Panics
118 | //   - Database errors
119 | //   - Unexpected system failures
120 | //
121 | // Example:
122 | //
123 | //	err := NewInternalError("req_123", dbErr)
124 | func NewInternalError(requestID string, err error) *HapaxError {
125 | 	return &HapaxError{
126 | 		Type:      InternalError,
127 | 		Message:   "An internal error occurred",
128 | 		Code:      http.StatusInternalServerError,
129 | 		RequestID: requestID,
130 | 		err:       err,
131 | 	}
132 | }
133 | 


--------------------------------------------------------------------------------
/errors/types_test.go:
--------------------------------------------------------------------------------
 1 | package errors
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"net/http"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func TestNewAuthError(t *testing.T) {
10 | 	requestID := "test-123"
11 | 	message := "invalid credentials"
12 | 	innerErr := errors.New("token expired")
13 | 
14 | 	err := NewAuthError(requestID, message, innerErr)
15 | 
16 | 	if err.Type != AuthError {
17 | 		t.Errorf("Expected error type %v, got %v", AuthError, err.Type)
18 | 	}
19 | 	if err.Message != message {
20 | 		t.Errorf("Expected message %v, got %v", message, err.Message)
21 | 	}
22 | 	if err.Code != http.StatusUnauthorized {
23 | 		t.Errorf("Expected code %v, got %v", http.StatusUnauthorized, err.Code)
24 | 	}
25 | 	if err.RequestID != requestID {
26 | 		t.Errorf("Expected requestID %v, got %v", requestID, err.RequestID)
27 | 	}
28 | 	if err.Unwrap() != innerErr {
29 | 		t.Errorf("Expected inner error %v, got %v", innerErr, err.Unwrap())
30 | 	}
31 | }
32 | 
33 | func TestNewValidationError(t *testing.T) {
34 | 	requestID := "test-456"
35 | 	message := "invalid input"
36 | 	details := map[string]interface{}{
37 | 		"field": "email",
38 | 		"error": "invalid format",
39 | 	}
40 | 
41 | 	err := NewValidationError(requestID, message, details)
42 | 
43 | 	if err.Type != ValidationError {
44 | 		t.Errorf("Expected error type %v, got %v", ValidationError, err.Type)
45 | 	}
46 | 	if err.Message != message {
47 | 		t.Errorf("Expected message %v, got %v", message, err.Message)
48 | 	}
49 | 	if err.Code != http.StatusBadRequest {
50 | 		t.Errorf("Expected code %v, got %v", http.StatusBadRequest, err.Code)
51 | 	}
52 | 	if err.RequestID != requestID {
53 | 		t.Errorf("Expected requestID %v, got %v", requestID, err.RequestID)
54 | 	}
55 | 	if err.Details["field"] != details["field"] {
56 | 		t.Errorf("Expected details field %v, got %v", details["field"], err.Details["field"])
57 | 	}
58 | }
59 | 
60 | func TestNewRateLimitError(t *testing.T) {
61 | 	requestID := "test-789"
62 | 	retryAfter := 60
63 | 
64 | 	err := NewRateLimitError(requestID, retryAfter)
65 | 
66 | 	if err.Type != RateLimitError {
67 | 		t.Errorf("Expected error type %v, got %v", RateLimitError, err.Type)
68 | 	}
69 | 	if err.Code != http.StatusTooManyRequests {
70 | 		t.Errorf("Expected code %v, got %v", http.StatusTooManyRequests, err.Code)
71 | 	}
72 | 	if err.Details["retry_after"] != retryAfter {
73 | 		t.Errorf("Expected retry_after %v, got %v", retryAfter, err.Details["retry_after"])
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/examples/hapax.yaml:
--------------------------------------------------------------------------------
  1 | # Hapax Server Configuration Example
  2 | # This example demonstrates all available configuration options,
  3 | # including optional features like caching and retries.
  4 | 
  5 | server:
  6 |   port: 8081
  7 |   read_timeout: 30s
  8 |   write_timeout: 30s
  9 |   max_header_bytes: 1048576  # 1MB
 10 |   shutdown_timeout: 30s
 11 | 
 12 | llm:
 13 |   # Provider Configuration
 14 |   provider: anthropic  # openai, anthropic, ollama
 15 |   model: claude-3-haiku-20240307
 16 |   api_key: "${ANTHROPIC_API_KEY}"  # Will be replaced with environment variable
 17 |   endpoint: "https://api.anthropic.com/v1"
 18 |   system_prompt: "You are Claude, a helpful AI assistant."
 19 |   
 20 |   # Token Management
 21 |   max_context_tokens: 200000  # Claude-3-Haiku context window
 22 |   
 23 |   # Generation Parameters
 24 |   options:
 25 |     temperature: 0.7        # Controls randomness (0.0-1.0)
 26 |     max_tokens: 4096        # Maximum tokens to generate
 27 |     top_p: 1               # Nucleus sampling threshold
 28 |     frequency_penalty: 0    # Reduces word repetition
 29 |     presence_penalty: 0     # Encourages topic diversity
 30 |     # Additional model-specific options can be added here
 31 |   
 32 |   # Caching Configuration (Optional)
 33 |   cache:
 34 |     enable: true
 35 |     type: memory           # memory, redis, or file
 36 |     ttl: 24h              # Cache entry lifetime
 37 |     max_size: 1000        # Maximum entries for memory cache
 38 |     # Redis Configuration (if type: redis)
 39 |     redis:
 40 |       address: localhost:6379
 41 |       password: ""         # Optional Redis password
 42 |       db: 0               # Redis database number
 43 |     # File Cache Configuration (if type: file)
 44 |     dir: ./cache          # Cache directory path
 45 |   
 46 |   # Retry Configuration (Optional)
 47 |   retry:
 48 |     max_retries: 3
 49 |     initial_delay: 1s
 50 |     max_delay: 30s
 51 |     multiplier: 2
 52 |     retryable_errors:
 53 |       - rate_limit
 54 |       - timeout
 55 |       - server_error
 56 | 
 57 | # Request Processing Configuration
 58 | processing:
 59 |   # Templates for different request types
 60 |   request_templates:
 61 |     # Simple completion template
 62 |     default: "{{.Input}}"
 63 |     
 64 |     # Chat completion template
 65 |     chat: |
 66 |       {{range .Messages}}
 67 |       {{.Role}}: {{.Content}}
 68 |       {{end}}
 69 |     
 70 |     # Function calling template (future)
 71 |     function: |
 72 |       Function Description:
 73 |       {{.FunctionDescription}}
 74 |       
 75 |       Input:
 76 |       {{.Input}}
 77 |   
 78 |   # Response Formatting Options
 79 |   response_formatting:
 80 |     clean_json: true        # Remove markdown and format JSON
 81 |     trim_whitespace: true   # Remove extra whitespace
 82 |     max_length: 8192        # Maximum response length
 83 | 
 84 | logging:
 85 |   level: info    # debug, info, warn, error
 86 |   format: json   # json or text
 87 | 
 88 | routes:
 89 |   - path: "/completions"
 90 |     handler: "completion"
 91 |     version: "v1"
 92 |     methods: ["POST"]
 93 |     middleware: ["auth", "ratelimit"]
 94 |     headers:
 95 |       Content-Type: "application/json"
 96 |     health_check:
 97 |       enabled: true
 98 |       interval: 30s
 99 |       timeout: 5s
100 |       threshold: 3
101 |       checks:
102 |         api: "http"
103 | 
104 |   - path: "/health"
105 |     handler: "health"
106 |     version: "v1"
107 |     methods: ["GET"]
108 |     health_check:
109 |       enabled: true
110 |       interval: 15s
111 |       timeout: 2s
112 |       threshold: 2
113 |       checks:
114 |         system: "tcp"
115 | 


--------------------------------------------------------------------------------
/examples/requests.md:
--------------------------------------------------------------------------------
  1 | # Hapax Request Examples
  2 | 
  3 | This document shows how to use different types of requests with Hapax.
  4 | 
  5 | ## Simple Completion (Default)
  6 | 
  7 | The simplest type of request. Just provide an input text and get a completion.
  8 | 
  9 | ```bash
 10 | # Using curl
 11 | curl -X POST http://localhost:8081/v1/completions \
 12 |   -H "Content-Type: application/json" \
 13 |   -d '{
 14 |     "prompt": "What is the capital of France?"
 15 |   }'
 16 | ```
 17 | 
 18 | ```json
 19 | // Response
 20 | {
 21 |     "content": "The capital of France is Paris."
 22 | }
 23 | ```
 24 | 
 25 | ## Chat Completion
 26 | 
 27 | For chat-style interactions with message history.
 28 | 
 29 | ```bash
 30 | # Using curl
 31 | curl -X POST "http://localhost:8081/v1/completions?type=chat" \
 32 |   -H "Content-Type: application/json" \
 33 |   -d '{
 34 |     "messages": [
 35 |       {"role": "system", "content": "You are a helpful assistant."},
 36 |       {"role": "user", "content": "Hi, how are you?"},
 37 |       {"role": "assistant", "content": "I'm doing well, thank you! How can I help you today?"},
 38 |       {"role": "user", "content": "What's the weather like?"}
 39 |     ]
 40 |   }'
 41 | ```
 42 | 
 43 | ```json
 44 | // Response
 45 | {
 46 |     "content": "I apologize, but I don't have access to real-time weather information. To get accurate weather information, I recommend checking a weather service or website for your specific location."
 47 | }
 48 | ```
 49 | 
 50 | ## Function Calling (Future)
 51 | 
 52 | For structured function-like interactions.
 53 | 
 54 | ```bash
 55 | # Using curl
 56 | curl -X POST "http://localhost:8081/v1/completions?type=function" \
 57 |   -H "Content-Type: application/json" \
 58 |   -d '{
 59 |     "function_description": "Get the weather for a specific location",
 60 |     "input": "What's the weather like in Paris?"
 61 |   }'
 62 | ```
 63 | 
 64 | ```json
 65 | // Response
 66 | {
 67 |     "content": "{\"function\": \"get_weather\", \"location\": \"Paris\", \"unit\": \"celsius\"}"
 68 | }
 69 | ```
 70 | 
 71 | ## Request Type Selection
 72 | 
 73 | 1. **Query Parameter**: Add `?type=chat` or `?type=function` to the URL
 74 | 2. **Default Behavior**: If no type is specified, the request is treated as a simple completion
 75 | 3. **Request Format**: 
 76 |    - Simple completion: Just needs `input`
 77 |    - Chat: Requires `messages` array with `role` and `content`
 78 |    - Function: Needs both `input` and `function_description`
 79 | 
 80 | ## Response Formatting
 81 | 
 82 | All responses are formatted according to the configuration:
 83 | - JSON responses are cleaned and properly formatted
 84 | - Whitespace is trimmed
 85 | - Responses are truncated to the configured maximum length
 86 | 
 87 | ## Error Handling
 88 | 
 89 | ```json
 90 | // Example error response
 91 | {
 92 |     "error": "Invalid chat request: messages array cannot be empty",
 93 |     "status": 400
 94 | }
 95 | ```
 96 | 
 97 | Common error cases:
 98 | 1. Missing required fields
 99 | 2. Invalid JSON format
100 | 3. Empty messages array in chat requests
101 | 4. Request processing failures
102 | 5. LLM errors
103 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/teilomillet/hapax
 2 | 
 3 | go 1.22.5
 4 | 
 5 | toolchain go1.22.10
 6 | 
 7 | require (
 8 | 	github.com/eapache/queue/v2 v2.0.0-20230407133247-75960ed334e4
 9 | 	github.com/fsnotify/fsnotify v1.8.0
10 | 	github.com/go-chi/chi/v5 v5.2.0
11 | 	github.com/go-playground/validator/v10 v10.22.0
12 | 	github.com/google/uuid v1.3.0
13 | 	github.com/pkoukk/tiktoken-go v0.1.7
14 | 	github.com/prometheus/client_golang v1.20.5
15 | 	github.com/sony/gobreaker v1.0.0
16 | 	github.com/stretchr/testify v1.9.0
17 | 	github.com/teilomillet/gollm v0.1.1
18 | 	go.uber.org/zap v1.27.0
19 | 	golang.org/x/sync v0.10.0
20 | 	golang.org/x/time v0.8.0
21 | 	gopkg.in/yaml.v3 v3.0.1
22 | )
23 | 
24 | require (
25 | 	github.com/bahlo/generic-list-go v0.2.0 // indirect
26 | 	github.com/beorn7/perks v1.0.1 // indirect
27 | 	github.com/buger/jsonparser v1.1.1 // indirect
28 | 	github.com/caarlos0/env/v11 v11.1.0 // indirect
29 | 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
30 | 	github.com/davecgh/go-spew v1.1.1 // indirect
31 | 	github.com/dlclark/regexp2 v1.10.0 // indirect
32 | 	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
33 | 	github.com/go-playground/locales v0.14.1 // indirect
34 | 	github.com/go-playground/universal-translator v0.18.1 // indirect
35 | 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
36 | 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
37 | 	github.com/invopop/jsonschema v0.12.0 // indirect
38 | 	github.com/klauspost/compress v1.17.9 // indirect
39 | 	github.com/kylelemons/godebug v1.1.0 // indirect
40 | 	github.com/leodido/go-urn v1.4.0 // indirect
41 | 	github.com/mailru/easyjson v0.7.7 // indirect
42 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
43 | 	github.com/onsi/ginkgo/v2 v2.9.5 // indirect
44 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
45 | 	github.com/prometheus/client_model v0.6.1 // indirect
46 | 	github.com/prometheus/common v0.55.0 // indirect
47 | 	github.com/prometheus/procfs v0.15.1 // indirect
48 | 	github.com/quic-go/qpack v0.5.1 // indirect
49 | 	github.com/quic-go/quic-go v0.48.2 // indirect
50 | 	github.com/stretchr/objx v0.5.2 // indirect
51 | 	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
52 | 	go.uber.org/mock v0.4.0 // indirect
53 | 	go.uber.org/multierr v1.11.0 // indirect
54 | 	golang.org/x/crypto v0.31.0 // indirect
55 | 	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
56 | 	golang.org/x/mod v0.17.0 // indirect
57 | 	golang.org/x/net v0.33.0 // indirect
58 | 	golang.org/x/sys v0.28.0 // indirect
59 | 	golang.org/x/text v0.21.0 // indirect
60 | 	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
61 | 	google.golang.org/protobuf v1.34.2 // indirect
62 | )
63 | 


--------------------------------------------------------------------------------
/golangci.yml:
--------------------------------------------------------------------------------
 1 | run:
 2 |   timeout: 5m
 3 |   tests: true
 4 |   # Include test files for analysis
 5 |   build-tags:
 6 |     - integration
 7 | 
 8 | linters:
 9 |   disable-all: true
10 |   enable:
11 |     - gofmt
12 |     - govet
13 |     - revive      # Modern replacement for golint
14 |     - gosimple
15 |     - staticcheck
16 |     - errcheck
17 |     - ineffassign
18 |     - unconvert
19 |     - misspell
20 |     - gosec
21 |     - bodyclose   # Checks whether HTTP response bodies are closed
22 |     - gocyclo     # Check function complexity
23 |     - goimports   # Checks imports ordering
24 |     - unused      # Checks for unused constants, variables, functions and types
25 | 
26 | linters-settings:
27 |   gocyclo:
28 |     # Minimal complexity of function to report
29 |     min-complexity: 15
30 |   revive:
31 |     rules:
32 |       - name: exported
33 |         arguments:
34 |           - "checkPrivateReceivers"
35 |           - "sayRepetitiveInsteadOf"
36 | 
37 | issues:
38 |   exclude-use-default: false
39 |   max-issues-per-linter: 0
40 |   max-same-issues: 0
41 |   exclude-rules:
42 |     - path: _test\.go
43 |       linters:
44 |         - gosec
45 |         - errcheck
46 | 
47 | output:
48 |   format: colored-line-number
49 |   print-issued-lines: true
50 |   print-linter-name: true


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"os/signal"
 8 | 	"syscall"
 9 | 
10 | 	"github.com/teilomillet/hapax/errors"
11 | 	"github.com/teilomillet/hapax/server"
12 | 	"go.uber.org/zap"
13 | )
14 | 
15 | func main() {
16 | 	// Create logger with explicit error handling
17 | 	logger, err := zap.NewProduction()
18 | 	if err != nil {
19 | 		// Fail fast if logger creation fails
20 | 		fmt.Printf("Critical error: Failed to create logger: %v\n", err)
21 | 		os.Exit(1)
22 | 	}
23 | 
24 | 	// Ensure logger is synced, with robust error handling
25 | 	defer func() {
26 | 		if syncErr := logger.Sync(); syncErr != nil {
27 | 			// Log sync failure, but don't mask the original error
28 | 			fmt.Printf("Warning: Failed to sync logger: %v\n", syncErr)
29 | 		}
30 | 	}()
31 | 
32 | 	// Set global logger
33 | 	errors.SetLogger(logger)
34 | 
35 | 	// Configuration and server setup with comprehensive error handling
36 | 	configPath := "config.yaml"
37 | 	server, err := server.NewServer(configPath, logger)
38 | 	if err != nil {
39 | 		logger.Fatal("Server initialization failed",
40 | 			zap.Error(err),
41 | 			zap.String("config_path", configPath),
42 | 		)
43 | 	}
44 | 
45 | 	// Graceful shutdown infrastructure
46 | 	ctx, cancel := context.WithCancel(context.Background())
47 | 	defer cancel()
48 | 
49 | 	// Signal handling with detailed logging
50 | 	sigChan := make(chan os.Signal, 1)
51 | 	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
52 | 
53 | 	go func() {
54 | 		sig := <-sigChan
55 | 		logger.Info("Shutdown signal received",
56 | 			zap.String("signal", sig.String()),
57 | 			zap.String("action", "initiating graceful shutdown"),
58 | 		)
59 | 		cancel()
60 | 	}()
61 | 
62 | 	// Server start with comprehensive error tracking
63 | 	if err := server.Start(ctx); err != nil {
64 | 		logger.Fatal("Server startup or runtime error",
65 | 			zap.Error(err),
66 | 			zap.String("action", "server_start_failed"),
67 | 		)
68 | 	}
69 | }
70 | 


--------------------------------------------------------------------------------
/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval: 15s
 3 |   evaluation_interval: 15s
 4 | 
 5 | scrape_configs:
 6 |   - job_name: 'hapax'
 7 |     static_configs:
 8 |       - targets: ['hapax:8080']
 9 |     metrics_path: '/metrics'
10 | 


--------------------------------------------------------------------------------
/server/circuitbreaker/circuitbreaker.go:
--------------------------------------------------------------------------------
  1 | // Package circuitbreaker provides an implementation of a circuit breaker pattern
  2 | // to manage service calls and handle failures gracefully.
  3 | 
  4 | package circuitbreaker
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"time"
  9 | 
 10 | 	"github.com/prometheus/client_golang/prometheus"
 11 | 	"github.com/sony/gobreaker"
 12 | 	"go.uber.org/zap"
 13 | )
 14 | 
 15 | // Config represents the configuration settings for a CircuitBreaker instance.
 16 | type Config struct {
 17 | 	// Name is the unique identifier for the circuit breaker.
 18 | 	Name string
 19 | 	// MaxRequests is the maximum number of requests allowed within the Interval.
 20 | 	MaxRequests uint32
 21 | 	// Interval is the time window for measuring the number of requests.
 22 | 	Interval time.Duration
 23 | 	// Timeout is the time limit for a single request.
 24 | 	Timeout time.Duration
 25 | 	// FailureThreshold is the number of consecutive failures required to trip the circuit breaker.
 26 | 	FailureThreshold uint32
 27 | 	// TestMode indicates whether the circuit breaker is running in test mode.
 28 | 	TestMode bool
 29 | }
 30 | 
 31 | // CircuitBreaker represents a circuit breaker instance with its configuration and state.
 32 | type CircuitBreaker struct {
 33 | 	// name is the unique identifier for the circuit breaker.
 34 | 	name string
 35 | 	// logger is the logger instance for logging events.
 36 | 	logger *zap.Logger
 37 | 	// metrics holds Prometheus metrics for the circuit breaker.
 38 | 	metrics *metrics
 39 | 	// breaker is the underlying gobreaker instance.
 40 | 	breaker *gobreaker.CircuitBreaker
 41 | }
 42 | 
 43 | // metrics holds Prometheus metrics for the circuit breaker.
 44 | type metrics struct {
 45 | 	// stateGauge tracks the current state of the circuit breaker.
 46 | 	stateGauge prometheus.Gauge
 47 | 	// failureCount tracks the total number of failures.
 48 | 	failureCount prometheus.Counter
 49 | 	// tripsTotal tracks the total number of times the circuit breaker has tripped.
 50 | 	tripsTotal prometheus.Counter
 51 | }
 52 | 
 53 | // initCircuitBreaker initializes a new CircuitBreaker instance and sets up metrics.
 54 | // It returns the initialized CircuitBreaker and any error encountered during initialization.
 55 | func initCircuitBreaker(config Config, logger *zap.Logger, registry *prometheus.Registry) (*CircuitBreaker, error) {
 56 | 	// Check if the circuit breaker name is empty.
 57 | 	if config.Name == "" {
 58 | 		return nil, fmt.Errorf("circuit breaker name cannot be empty")
 59 | 	}
 60 | 
 61 | 	// Create a new CircuitBreaker instance.
 62 | 	cb := &CircuitBreaker{
 63 | 		name:   config.Name,
 64 | 		logger: logger,
 65 | 	}
 66 | 
 67 | 	// Initialize metrics if not in test mode.
 68 | 	if registry != nil && !config.TestMode {
 69 | 		// Create a new metrics instance.
 70 | 		cb.metrics = &metrics{
 71 | 			stateGauge: prometheus.NewGauge(prometheus.GaugeOpts{
 72 | 				Name: "circuit_breaker_state",
 73 | 				Help: "Current state of the circuit breaker (0=closed, 1=half-open, 2=open)",
 74 | 				ConstLabels: prometheus.Labels{
 75 | 					"name": config.Name,
 76 | 				},
 77 | 			}),
 78 | 			failureCount: prometheus.NewCounter(prometheus.CounterOpts{
 79 | 				Name: "circuit_breaker_failures_total",
 80 | 				Help: "Total number of failures",
 81 | 				ConstLabels: prometheus.Labels{
 82 | 					"name": config.Name,
 83 | 				},
 84 | 			}),
 85 | 			tripsTotal: prometheus.NewCounter(prometheus.CounterOpts{
 86 | 				Name: "circuit_breaker_trips_total",
 87 | 				Help: "Total number of times the circuit breaker has tripped",
 88 | 				ConstLabels: prometheus.Labels{
 89 | 					"name": config.Name,
 90 | 				},
 91 | 			}),
 92 | 		}
 93 | 
 94 | 		// Register metrics with the Prometheus registry.
 95 | 		registry.MustRegister(cb.metrics.stateGauge)
 96 | 		registry.MustRegister(cb.metrics.failureCount)
 97 | 		registry.MustRegister(cb.metrics.tripsTotal)
 98 | 	}
 99 | 
100 | 	return cb, nil
101 | }
102 | 
103 | // configureCircuitBreaker sets the configuration settings for the CircuitBreaker instance.
104 | // It configures the gobreaker settings, including the trip conditions and state change handlers.
105 | func configureCircuitBreaker(cb *CircuitBreaker, config Config, logger *zap.Logger) {
106 | 	// Create a new gobreaker settings instance.
107 | 	settings := gobreaker.Settings{
108 | 		Name:        config.Name,
109 | 		MaxRequests: config.MaxRequests,
110 | 		Interval:    config.Interval,
111 | 		Timeout:     config.Timeout,
112 | 
113 | 		// ReadyToTrip determines if the circuit breaker should trip based on consecutive failures.
114 | 		ReadyToTrip: func(counts gobreaker.Counts) bool {
115 | 			// Check if the number of consecutive failures exceeds the threshold.
116 | 			shouldTrip := counts.ConsecutiveFailures >= config.FailureThreshold
117 | 			if shouldTrip {
118 | 				// Log a message when the circuit breaker trips.
119 | 				logger.Info("Circuit breaker tripping",
120 | 					zap.String("name", config.Name),
121 | 					zap.Uint32("consecutive_failures", counts.ConsecutiveFailures),
122 | 					zap.Uint32("threshold", config.FailureThreshold))
123 | 			}
124 | 			return shouldTrip
125 | 		},
126 | 
127 | 		// OnStateChange handles actions to take when the circuit breaker state changes.
128 | 		OnStateChange: func(name string, from, to gobreaker.State) {
129 | 			// Log a message when the circuit breaker state changes.
130 | 			logger.Info("Circuit breaker state changed",
131 | 				zap.String("name", name),
132 | 				zap.String("from", from.String()),
133 | 				zap.String("to", to.String()))
134 | 
135 | 			// Update metrics based on the new state.
136 | 			if cb.metrics != nil {
137 | 				switch to {
138 | 				case gobreaker.StateOpen:
139 | 					cb.metrics.stateGauge.Set(2)
140 | 					cb.metrics.tripsTotal.Inc()
141 | 				case gobreaker.StateHalfOpen:
142 | 					cb.metrics.stateGauge.Set(1)
143 | 				case gobreaker.StateClosed:
144 | 					cb.metrics.stateGauge.Set(0)
145 | 				}
146 | 			}
147 | 		},
148 | 	}
149 | 
150 | 	// Create a new gobreaker instance with the configured settings.
151 | 	cb.breaker = gobreaker.NewCircuitBreaker(settings)
152 | }
153 | 
154 | // NewCircuitBreaker creates a new CircuitBreaker instance and configures it with the provided settings.
155 | // It returns the configured CircuitBreaker instance and any error that occurred during initialization.
156 | func NewCircuitBreaker(config Config, logger *zap.Logger, registry *prometheus.Registry) (*CircuitBreaker, error) {
157 | 	// Initialize the CircuitBreaker instance.
158 | 	cb, err := initCircuitBreaker(config, logger, registry)
159 | 	if err != nil {
160 | 		return nil, err
161 | 	}
162 | 
163 | 	// Configure the CircuitBreaker instance with the provided settings.
164 | 	configureCircuitBreaker(cb, config, logger)
165 | 
166 | 	return cb, nil
167 | }
168 | 
169 | // Execute executes a function within the circuit breaker.
170 | // It returns any error that occurred during execution.
171 | func (cb *CircuitBreaker) Execute(operation func() error) error {
172 | 	// Execute the function within the circuit breaker.
173 | 	result, err := cb.breaker.Execute(func() (interface{}, error) {
174 | 		// Call the operation function.
175 | 		if err := operation(); err != nil {
176 | 			// Increment the failure count if the operation fails.
177 | 			if cb.metrics != nil {
178 | 				cb.metrics.failureCount.Inc()
179 | 			}
180 | 			// Log a message when the operation fails.
181 | 			cb.logger.Debug("Operation failed",
182 | 				zap.String("name", cb.name),
183 | 				zap.Error(err))
184 | 			return nil, err
185 | 		}
186 | 		return nil, nil
187 | 	})
188 | 
189 | 	// Check if the circuit breaker is open.
190 | 	if err != nil {
191 | 		if err == gobreaker.ErrOpenState {
192 | 			// Log a message when the circuit breaker is open.
193 | 			cb.logger.Debug("Circuit breaker is open",
194 | 				zap.String("name", cb.name))
195 | 		}
196 | 		return err
197 | 	}
198 | 
199 | 	// Ignore the result since we don't use it.
200 | 	_ = result
201 | 	return nil
202 | }
203 | 
204 | // State returns the current state of the circuit breaker.
205 | func (cb *CircuitBreaker) State() gobreaker.State {
206 | 	return cb.breaker.State()
207 | }
208 | 
209 | // Counts returns the current counts of the circuit breaker.
210 | func (cb *CircuitBreaker) Counts() gobreaker.Counts {
211 | 	return cb.breaker.Counts()
212 | }
213 | 


--------------------------------------------------------------------------------
/server/circuitbreaker/errors.go:
--------------------------------------------------------------------------------
1 | package circuitbreaker
2 | 
3 | import "errors"
4 | 
5 | var (
6 | 	// ErrCircuitOpen is returned when the circuit breaker is open
7 | 	ErrCircuitOpen = errors.New("circuit breaker is open")
8 | )
9 | 


--------------------------------------------------------------------------------
/server/handlers/integration_test.go:
--------------------------------------------------------------------------------
  1 | package handlers
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"net/http"
  8 | 	"net/http/httptest"
  9 | 	"testing"
 10 | 	"time"
 11 | 
 12 | 	"github.com/stretchr/testify/assert"
 13 | 	"github.com/stretchr/testify/require"
 14 | 	"github.com/teilomillet/hapax/config"
 15 | 	"github.com/teilomillet/hapax/errors"
 16 | 	"github.com/teilomillet/hapax/server/metrics"
 17 | 	"github.com/teilomillet/hapax/server/middleware"
 18 | 	"github.com/teilomillet/hapax/server/processing"
 19 | 	"github.com/teilomillet/gollm"
 20 | 	"github.com/teilomillet/hapax/server/mocks"
 21 | 	"go.uber.org/zap"
 22 | )
 23 | 
 24 | // TestCompletionHandlerIntegration tests the CompletionHandler integrated with:
 25 | // - Router for request routing
 26 | // - Middleware for request ID and rate limiting
 27 | // - Error handling middleware
 28 | // - Logging middleware
 29 | func TestCompletionHandlerIntegration(t *testing.T) {
 30 | 	// Create metrics
 31 | 	m := metrics.NewMetrics()
 32 | 
 33 | 	// Create mock LLM
 34 | 	mockLLM := mocks.NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
 35 | 		// If context has timeout header, simulate timeout
 36 | 		if ctx.Value(middleware.XTestTimeoutKey) != nil {
 37 | 			// Sleep longer than the timeout
 38 | 			time.Sleep(5 * time.Second)
 39 | 		}
 40 | 		return "Mock response", nil
 41 | 	})
 42 | 
 43 | 	// Create logger
 44 | 	logger := zap.NewNop()
 45 | 
 46 | 	// Create processor
 47 | 	cfg := &config.ProcessingConfig{
 48 | 		RequestTemplates: map[string]string{
 49 | 			"default":  "{{.Input}}",
 50 | 			"chat":     "{{range .Messages}}{{.Role}}: {{.Content}}\n{{end}}",
 51 | 			"function": "Function: {{.FunctionDescription}}\nInput: {{.Input}}",
 52 | 		},
 53 | 	}
 54 | 	processor, err := processing.NewProcessor(cfg, mockLLM)
 55 | 	require.NoError(t, err)
 56 | 
 57 | 	// Create handler
 58 | 	handler := NewCompletionHandler(processor, logger)
 59 | 
 60 | 	// Create middleware chain
 61 | 	chain := middleware.RequestID(
 62 | 		middleware.PrometheusMetrics(m)(
 63 | 			middleware.RateLimit(m)(
 64 | 				middleware.Timeout(5*time.Second)(handler),
 65 | 			),
 66 | 		),
 67 | 	)
 68 | 
 69 | 	// Create test server
 70 | 	ts := httptest.NewServer(chain)
 71 | 	defer ts.Close()
 72 | 
 73 | 	tests := []struct {
 74 | 		name          string
 75 | 		method        string
 76 | 		path          string
 77 | 		requestBody   interface{}
 78 | 		headers       map[string]string
 79 | 		expectedCode  int
 80 | 		expectedError *errors.ErrorResponse
 81 | 		setup         func(t *testing.T, ts *httptest.Server)
 82 | 	}{
 83 | 		{
 84 | 			name:         "method not allowed",
 85 | 			method:       http.MethodGet,
 86 | 			path:         "/v1/completions",
 87 | 			expectedCode: http.StatusMethodNotAllowed,
 88 | 			expectedError: &errors.ErrorResponse{
 89 | 				Type:    errors.ValidationError,
 90 | 				Message: "Method not allowed",
 91 | 				Details: map[string]interface{}{
 92 | 					"method":          http.MethodGet,
 93 | 					"allowed_methods": []string{http.MethodPost},
 94 | 				},
 95 | 			},
 96 | 		},
 97 | 		{
 98 | 			name:         "missing content type",
 99 | 			method:       http.MethodPost,
100 | 			path:         "/v1/completions",
101 | 			requestBody:  CompletionRequest{Input: "test"},
102 | 			expectedCode: http.StatusBadRequest,
103 | 			expectedError: &errors.ErrorResponse{
104 | 				Type:    errors.ValidationError,
105 | 				Message: "Content-Type header required",
106 | 				Details: map[string]interface{}{
107 | 					"required_content_type": "application/json",
108 | 				},
109 | 			},
110 | 		},
111 | 		{
112 | 			name:         "rate limit exceeded",
113 | 			method:       http.MethodPost,
114 | 			path:         "/v1/completions",
115 | 			headers:      map[string]string{"Content-Type": "application/json"},
116 | 			requestBody:  CompletionRequest{Input: "test"},
117 | 			expectedCode: http.StatusTooManyRequests,
118 | 			expectedError: &errors.ErrorResponse{
119 | 				Type:    errors.RateLimitError,
120 | 				Message: "Rate limit exceeded",
121 | 				Details: map[string]interface{}{
122 | 					"limit":  10,
123 | 					"window": "1m0s",
124 | 				},
125 | 			},
126 | 			setup: func(t *testing.T, ts *httptest.Server) {
127 | 				// Reset rate limiters before starting
128 | 				middleware.ResetRateLimiters()
129 | 
130 | 				// Make 10 successful requests first
131 | 				for i := 0; i < 10; i++ {
132 | 					body, err := json.Marshal(CompletionRequest{Input: "test"})
133 | 					require.NoError(t, err)
134 | 					req, err := http.NewRequest(http.MethodPost, ts.URL+"/v1/completions", bytes.NewReader(body))
135 | 					require.NoError(t, err)
136 | 					req.Header.Set("Content-Type", "application/json")
137 | 					resp, err := http.DefaultClient.Do(req)
138 | 					require.NoError(t, err)
139 | 					require.Equal(t, http.StatusOK, resp.StatusCode)
140 | 					resp.Body.Close()
141 | 				}
142 | 
143 | 				// The next request should fail
144 | 				body, err := json.Marshal(CompletionRequest{Input: "test"})
145 | 				require.NoError(t, err)
146 | 				req, err := http.NewRequest(http.MethodPost, ts.URL+"/v1/completions", bytes.NewReader(body))
147 | 				require.NoError(t, err)
148 | 				req.Header.Set("Content-Type", "application/json")
149 | 				resp, err := http.DefaultClient.Do(req)
150 | 				require.NoError(t, err)
151 | 				require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
152 | 				resp.Body.Close()
153 | 			},
154 | 		},
155 | 		{
156 | 			name:         "malformed json",
157 | 			method:       http.MethodPost,
158 | 			path:         "/v1/completions",
159 | 			headers:      map[string]string{"Content-Type": "application/json"},
160 | 			requestBody:  "{invalid json}",
161 | 			expectedCode: http.StatusBadRequest,
162 | 			expectedError: &errors.ErrorResponse{
163 | 				Type:    errors.ValidationError,
164 | 				Message: "Invalid completion request format",
165 | 				Details: map[string]interface{}{
166 | 					"type": "default",
167 | 				},
168 | 			},
169 | 		},
170 | 		{
171 | 			name:   "context timeout",
172 | 			method: http.MethodPost,
173 | 			path:   "/v1/completions",
174 | 			headers: map[string]string{
175 | 				"Content-Type":   "application/json",
176 | 				"X-Test-Timeout": "true",
177 | 			},
178 | 			requestBody:  CompletionRequest{Input: "test"},
179 | 			expectedCode: http.StatusGatewayTimeout,
180 | 			expectedError: &errors.ErrorResponse{
181 | 				Type:    errors.InternalError,
182 | 				Message: "Request timeout",
183 | 				Details: map[string]interface{}{
184 | 					"timeout": "5s",
185 | 				},
186 | 			},
187 | 		},
188 | 	}
189 | 
190 | 	for _, tt := range tests {
191 | 		t.Run(tt.name, func(t *testing.T) {
192 | 			// Reset rate limiters before each test
193 | 			middleware.ResetRateLimiters()
194 | 
195 | 			// Run setup first if it exists
196 | 			if tt.setup != nil {
197 | 				tt.setup(t, ts)
198 | 			}
199 | 
200 | 			// Create request
201 | 			var body []byte
202 | 			if str, ok := tt.requestBody.(string); ok {
203 | 				body = []byte(str)
204 | 			} else {
205 | 				var err error
206 | 				body, err = json.Marshal(tt.requestBody)
207 | 				require.NoError(t, err)
208 | 			}
209 | 
210 | 			// Create request with context
211 | 			req, err := http.NewRequest(tt.method, ts.URL+tt.path, bytes.NewReader(body))
212 | 			require.NoError(t, err)
213 | 
214 | 			// Add headers
215 | 			for k, v := range tt.headers {
216 | 				req.Header.Set(k, v)
217 | 			}
218 | 
219 | 			// Send request
220 | 			resp, err := http.DefaultClient.Do(req)
221 | 			require.NoError(t, err)
222 | 			defer resp.Body.Close()
223 | 
224 | 			// Verify status code
225 | 			assert.Equal(t, tt.expectedCode, resp.StatusCode)
226 | 
227 | 			if tt.expectedError != nil {
228 | 				var gotError errors.ErrorResponse
229 | 				err := json.NewDecoder(resp.Body).Decode(&gotError)
230 | 				require.NoError(t, err)
231 | 
232 | 				assert.Equal(t, tt.expectedError.Type, gotError.Type)
233 | 				assert.Equal(t, tt.expectedError.Message, gotError.Message)
234 | 				assert.NotEmpty(t, gotError.RequestID)
235 | 
236 | 				// Compare details, handling slice type differences
237 | 				if tt.expectedError.Details != nil {
238 | 					assert.Equal(t, len(tt.expectedError.Details), len(gotError.Details))
239 | 					for k, v := range tt.expectedError.Details {
240 | 						gotV, ok := gotError.Details[k]
241 | 						assert.True(t, ok, "missing key %s in error details", k)
242 | 
243 | 						// Special handling for slices
244 | 						if expSlice, ok := v.([]string); ok {
245 | 							if gotSlice, ok := gotV.([]interface{}); ok {
246 | 								assert.Equal(t, len(expSlice), len(gotSlice), "slice length mismatch for key %s", k)
247 | 								for i := range expSlice {
248 | 									assert.Equal(t, expSlice[i], gotSlice[i].(string))
249 | 								}
250 | 								continue
251 | 							}
252 | 						}
253 | 
254 | 						// Special handling for numbers from JSON
255 | 						if expInt, ok := v.(int); ok {
256 | 							if gotFloat, ok := gotV.(float64); ok {
257 | 								assert.Equal(t, float64(expInt), gotFloat)
258 | 								continue
259 | 							}
260 | 						}
261 | 
262 | 						// Regular comparison for other values
263 | 						assert.Equal(t, v, gotV)
264 | 					}
265 | 				}
266 | 			}
267 | 		})
268 | 	}
269 | }
270 | 


--------------------------------------------------------------------------------
/server/http3_0rtt_test.go:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"crypto/rand"
  7 | 	"crypto/rsa"
  8 | 	"crypto/tls"
  9 | 	"crypto/x509"
 10 | 	"crypto/x509/pkix"
 11 | 	"encoding/json"
 12 | 	"encoding/pem"
 13 | 	"math/big"
 14 | 	"net/http"
 15 | 	"os"
 16 | 	"testing"
 17 | 	"time"
 18 | 
 19 | 	"github.com/quic-go/quic-go"
 20 | 	"github.com/quic-go/quic-go/http3"
 21 | 	"github.com/stretchr/testify/assert"
 22 | 	"github.com/stretchr/testify/require"
 23 | 	"github.com/teilomillet/gollm"
 24 | 	"github.com/teilomillet/hapax/config"
 25 | 	"github.com/teilomillet/hapax/server/mocks"
 26 | 	"go.uber.org/zap/zaptest"
 27 | )
 28 | 
 29 | func generateTestCertificates(t *testing.T) (string, string) {
 30 | 	certFile, err := os.CreateTemp("", "cert*.pem")
 31 | 	require.NoError(t, err)
 32 | 	keyFile, err := os.CreateTemp("", "key*.pem")
 33 | 	require.NoError(t, err)
 34 | 
 35 | 	// Generate self-signed certificate
 36 | 	priv, err := rsa.GenerateKey(rand.Reader, 2048)
 37 | 	require.NoError(t, err)
 38 | 
 39 | 	template := x509.Certificate{
 40 | 		SerialNumber: big.NewInt(1),
 41 | 		Subject: pkix.Name{
 42 | 			Organization: []string{"Test Co"},
 43 | 		},
 44 | 		NotBefore: time.Now(),
 45 | 		NotAfter:  time.Now().Add(time.Hour * 24 * 180),
 46 | 
 47 | 		KeyUsage:              x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
 48 | 		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
 49 | 		BasicConstraintsValid: true,
 50 | 		DNSNames:              []string{"localhost"},
 51 | 	}
 52 | 
 53 | 	derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
 54 | 	require.NoError(t, err)
 55 | 
 56 | 	// Write certificate
 57 | 	err = pem.Encode(certFile, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
 58 | 	require.NoError(t, err)
 59 | 
 60 | 	// Write private key
 61 | 	privBytes := x509.MarshalPKCS1PrivateKey(priv)
 62 | 	err = pem.Encode(keyFile, &pem.Block{Type: "RSA PRIVATE KEY", Bytes: privBytes})
 63 | 	require.NoError(t, err)
 64 | 
 65 | 	certFile.Close()
 66 | 	keyFile.Close()
 67 | 
 68 | 	return certFile.Name(), keyFile.Name()
 69 | }
 70 | 
 71 | func TestHTTP3_0RTT(t *testing.T) {
 72 | 	// HTTP/3 (QUIC) requires specific UDP buffer sizes to function properly.
 73 | 	// The quic-go library needs at least 7MB (7168 KB) for optimal performance.
 74 | 	// Most CI environments have restricted UDP buffer sizes (typically 2MB max),
 75 | 	// making it impossible to properly test HTTP/3 0-RTT functionality.
 76 | 	//
 77 | 	// See: https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes
 78 | 	if os.Getenv("CI") == "true" {
 79 | 		t.Skip("Skipping HTTP/3 0-RTT test in CI environment due to UDP buffer size limitations (needs 7MB, CI typically allows only 2MB)")
 80 | 	}
 81 | 
 82 | 	// Create test certificates
 83 | 	certFile, keyFile := generateTestCertificates(t)
 84 | 	defer os.Remove(certFile)
 85 | 	defer os.Remove(keyFile)
 86 | 
 87 | 	// Create configuration with 0-RTT enabled
 88 | 	cfg := &config.Config{
 89 | 		Server: config.ServerConfig{
 90 | 			Port:            8080,
 91 | 			ReadTimeout:     30 * time.Second,
 92 | 			WriteTimeout:    30 * time.Second,
 93 | 			MaxHeaderBytes:  1 << 20,
 94 | 			ShutdownTimeout: 30 * time.Second,
 95 | 			HTTP3: &config.HTTP3Config{
 96 | 				Enabled:                    true,
 97 | 				Port:                       8443,
 98 | 				TLSCertFile:                certFile,
 99 | 				TLSKeyFile:                 keyFile,
100 | 				IdleTimeout:                5 * time.Minute,
101 | 				MaxBiStreamsConcurrent:     1000,
102 | 				MaxUniStreamsConcurrent:    1000,
103 | 				MaxStreamReceiveWindow:     10 * 1024 * 1024,
104 | 				MaxConnectionReceiveWindow: 25 * 1024 * 1024,
105 | 				Enable0RTT:                 true,
106 | 				Max0RTTSize:                16 * 1024,
107 | 				Allow0RTTReplay:            false,
108 | 				// Set UDP buffer size to 7MB as required by quic-go for proper operation
109 | 				// This value comes from quic-go's internal requirements:
110 | 				// https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes#non-bsd
111 | 				UDPReceiveBufferSize: 7168 * 1024, // 7MB (7168 KB) - minimum required by quic-go
112 | 			},
113 | 		},
114 | 		LLM: config.LLMConfig{
115 | 			Provider:     "mock",
116 | 			Model:        "mock-model",
117 | 			SystemPrompt: "You are a test assistant",
118 | 		},
119 | 	}
120 | 
121 | 	// Create test logger
122 | 	logger := zaptest.NewLogger(t)
123 | 
124 | 	// Create mock LLM
125 | 	mockLLM := mocks.NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
126 | 		return "test response", nil
127 | 	})
128 | 
129 | 	// Create server with better error handling
130 | 	server, err := NewServerWithConfig(mocks.NewMockConfigWatcher(cfg), mockLLM, logger)
131 | 	require.NoError(t, err, "Failed to create server")
132 | 	require.NotNil(t, server, "Server instance should not be nil")
133 | 
134 | 	// Start server
135 | 	ctx, cancel := context.WithCancel(context.Background())
136 | 	defer cancel()
137 | 
138 | 	errCh := make(chan error, 1)
139 | 	go func() {
140 | 		errCh <- server.Start(ctx)
141 | 	}()
142 | 
143 | 	// Configure HTTP/3 client with longer timeouts
144 | 	transport := &http3.Transport{
145 | 		TLSClientConfig: &tls.Config{
146 | 			InsecureSkipVerify: true,
147 | 		},
148 | 		QUICConfig: &quic.Config{
149 | 			MaxIdleTimeout:             30 * time.Second,
150 | 			HandshakeIdleTimeout:       10 * time.Second,
151 | 			MaxStreamReceiveWindow:     10 * 1024 * 1024,
152 | 			MaxConnectionReceiveWindow: 25 * 1024 * 1024,
153 | 			KeepAlivePeriod:            5 * time.Second,
154 | 			Allow0RTT:                  true,
155 | 		},
156 | 	}
157 | 	defer transport.Close()
158 | 
159 | 	client := &http.Client{
160 | 		Transport: transport,
161 | 		Timeout:   30 * time.Second,
162 | 	}
163 | 
164 | 	// Wait for server to be ready
165 | 	require.Eventually(t, func() bool {
166 | 		resp, err := client.Get("https://localhost:8443/health")
167 | 		if err != nil {
168 | 			t.Logf("Server not ready: %v", err)
169 | 			return false
170 | 		}
171 | 		defer resp.Body.Close()
172 | 		return resp.StatusCode == http.StatusOK
173 | 	}, 10*time.Second, 100*time.Millisecond, "Server failed to start")
174 | 
175 | 	t.Run("0-RTT Basic Functionality", func(t *testing.T) {
176 | 		// First request establishes connection
177 | 		resp, err := client.Get("https://localhost:8443/health")
178 | 		require.NoError(t, err)
179 | 		defer resp.Body.Close()
180 | 		assert.Equal(t, http.StatusOK, resp.StatusCode)
181 | 
182 | 		// Second request should use 0-RTT
183 | 		resp, err = client.Get("https://localhost:8443/health")
184 | 		require.NoError(t, err)
185 | 		defer resp.Body.Close()
186 | 		assert.Equal(t, http.StatusOK, resp.StatusCode)
187 | 	})
188 | 
189 | 	t.Run("0-RTT Replay Protection with Real Data", func(t *testing.T) {
190 | 		// Create completion request
191 | 		reqBody := map[string]string{"input": "test"}
192 | 		jsonData, err := json.Marshal(reqBody)
193 | 		require.NoError(t, err)
194 | 
195 | 		// First request
196 | 		req1, err := http.NewRequest(http.MethodPost, "https://localhost:8443/v1/completions", bytes.NewBuffer(jsonData))
197 | 		require.NoError(t, err)
198 | 		req1.Header.Set("Content-Type", "application/json")
199 | 
200 | 		resp, err := client.Do(req1)
201 | 		require.NoError(t, err)
202 | 		defer resp.Body.Close()
203 | 		assert.Equal(t, http.StatusOK, resp.StatusCode)
204 | 
205 | 		// Create a new request with the same data for replay
206 | 		req2, err := http.NewRequest(http.MethodPost, "https://localhost:8443/v1/completions", bytes.NewBuffer(jsonData))
207 | 		require.NoError(t, err)
208 | 		req2.Header.Set("Content-Type", "application/json")
209 | 
210 | 		// Immediate replay should be rejected
211 | 		resp, err = client.Do(req2)
212 | 		require.NoError(t, err)
213 | 		defer resp.Body.Close()
214 | 		assert.Equal(t, http.StatusTooEarly, resp.StatusCode)
215 | 	})
216 | 
217 | 	// Cleanup
218 | 	cancel()
219 | 	select {
220 | 	case err := <-errCh:
221 | 		assert.NoError(t, err)
222 | 	case <-time.After(5 * time.Second):
223 | 		t.Error("Server did not shut down within timeout")
224 | 	}
225 | }
226 | 


--------------------------------------------------------------------------------
/server/metrics/metrics.go:
--------------------------------------------------------------------------------
 1 | package metrics
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/prometheus/client_golang/prometheus"
 7 | 	"github.com/prometheus/client_golang/prometheus/collectors"
 8 | 	"github.com/prometheus/client_golang/prometheus/promauto"
 9 | 	"github.com/prometheus/client_golang/prometheus/promhttp"
10 | )
11 | 
12 | // Metrics encapsulates Prometheus metrics for the server.
13 | type Metrics struct {
14 | 	registry        *prometheus.Registry
15 | 	RequestsTotal   *prometheus.CounterVec
16 | 	RequestDuration *prometheus.HistogramVec
17 | 	ActiveRequests  *prometheus.GaugeVec
18 | 	ErrorsTotal     *prometheus.CounterVec
19 | 	RateLimitHits   *prometheus.CounterVec
20 | }
21 | 
22 | // NewMetrics creates a new Metrics instance with a custom registry.
23 | func NewMetrics() *Metrics {
24 | 	registry := prometheus.NewRegistry()
25 | 	factory := promauto.With(registry)
26 | 
27 | 	m := &Metrics{
28 | 		registry: registry,
29 | 		RequestsTotal: factory.NewCounterVec(
30 | 			prometheus.CounterOpts{
31 | 				Name: "hapax_http_requests_total",
32 | 				Help: "Total number of HTTP requests by endpoint and status",
33 | 			},
34 | 			[]string{"endpoint", "status"},
35 | 		),
36 | 		RequestDuration: factory.NewHistogramVec(
37 | 			prometheus.HistogramOpts{
38 | 				Name:    "hapax_http_request_duration_seconds",
39 | 				Help:    "Duration of HTTP requests in seconds",
40 | 				Buckets: prometheus.DefBuckets,
41 | 			},
42 | 			[]string{"endpoint"},
43 | 		),
44 | 		ActiveRequests: factory.NewGaugeVec(
45 | 			prometheus.GaugeOpts{
46 | 				Name: "hapax_http_active_requests",
47 | 				Help: "Number of currently active HTTP requests",
48 | 			},
49 | 			[]string{"endpoint"},
50 | 		),
51 | 		ErrorsTotal: factory.NewCounterVec(
52 | 			prometheus.CounterOpts{
53 | 				Name: "hapax_errors_total",
54 | 				Help: "Total number of errors by type",
55 | 			},
56 | 			[]string{"type"},
57 | 		),
58 | 		RateLimitHits: factory.NewCounterVec(
59 | 			prometheus.CounterOpts{
60 | 				Name: "hapax_rate_limit_hits_total",
61 | 				Help: "Total number of rate limit hits by client",
62 | 			},
63 | 			[]string{"client"},
64 | 		),
65 | 	}
66 | 
67 | 	// Register default Go metrics
68 | 	registry.MustRegister(collectors.NewGoCollector())
69 | 	registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
70 | 
71 | 	// Initialize some default metrics
72 | 	m.RequestsTotal.WithLabelValues("/health", "200").Add(0)
73 | 	m.RequestsTotal.WithLabelValues("/metrics", "200").Add(0)
74 | 	m.RequestDuration.WithLabelValues("/health").Observe(0)
75 | 	m.RequestDuration.WithLabelValues("/metrics").Observe(0)
76 | 	m.ActiveRequests.WithLabelValues("queued").Add(0)
77 | 	m.ActiveRequests.WithLabelValues("processing").Add(0)
78 | 
79 | 	return m
80 | }
81 | 
82 | // Handler returns a handler for the metrics endpoint.
83 | func (m *Metrics) Handler() http.Handler {
84 | 	return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{
85 | 		EnableOpenMetrics: false, // Disable OpenMetrics format to avoid escaping=values
86 | 	})
87 | }
88 | 


--------------------------------------------------------------------------------
/server/middleware/auth.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"strings"
 6 | 
 7 | 	"github.com/teilomillet/hapax/errors"
 8 | )
 9 | 
10 | // Authentication middleware validates API keys and manages authentication
11 | func Authentication(next http.Handler) http.Handler {
12 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
13 | 		// Check for API key
14 | 		apiKey := r.Header.Get("X-API-Key")
15 | 		if apiKey != "" {
16 | 			// TODO: Validate API key against configuration or database
17 | 			// For now, we'll accept any non-empty key
18 | 			next.ServeHTTP(w, r)
19 | 			return
20 | 		}
21 | 
22 | 		// Check for Bearer token
23 | 		authHeader := r.Header.Get("Authorization")
24 | 		if authHeader != "" && strings.HasPrefix(authHeader, "Bearer ") {
25 | 			token := strings.TrimPrefix(authHeader, "Bearer ")
26 | 			if token != "" {
27 | 				// TODO: Validate token against configuration or database
28 | 				// For now, we'll accept any non-empty token
29 | 				next.ServeHTTP(w, r)
30 | 				return
31 | 			}
32 | 		}
33 | 
34 | 		errors.ErrorWithType(w, "Missing or invalid authentication", errors.AuthenticationError, http.StatusUnauthorized)
35 | 	})
36 | }
37 | 


--------------------------------------------------------------------------------
/server/middleware/constants.go:
--------------------------------------------------------------------------------
1 | package middleware
2 | 
3 | type contextKey string
4 | 
5 | const (
6 | 	RequestIDKey contextKey = "request_id"
7 | 	XTestTimeoutKey contextKey = "X-Test-Timeout"
8 | )
9 | 


--------------------------------------------------------------------------------
/server/middleware/logging.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"time"
 6 | 
 7 | 	"go.uber.org/zap"
 8 | )
 9 | 
10 | // ResponseWriter wraps http.ResponseWriter to capture status code and size
11 | type ResponseWriter struct {
12 | 	http.ResponseWriter
13 | 	status int
14 | 	size   int64
15 | }
16 | 
17 | // NewResponseWriter creates a new ResponseWriter
18 | func NewResponseWriter(w http.ResponseWriter) *ResponseWriter {
19 | 	return &ResponseWriter{ResponseWriter: w}
20 | }
21 | 
22 | func (w *ResponseWriter) WriteHeader(status int) {
23 | 	w.status = status
24 | 	w.ResponseWriter.WriteHeader(status)
25 | }
26 | 
27 | func (w *ResponseWriter) Write(b []byte) (int, error) {
28 | 	size, err := w.ResponseWriter.Write(b)
29 | 	w.size += int64(size)
30 | 	return size, err
31 | }
32 | 
33 | // Status returns the status code
34 | func (w *ResponseWriter) Status() int {
35 | 	if w.status == 0 {
36 | 		return http.StatusOK
37 | 	}
38 | 	return w.status
39 | }
40 | 
41 | // Size returns the response size
42 | func (w *ResponseWriter) Size() int64 {
43 | 	return w.size
44 | }
45 | 
46 | // Logging middleware logs request and response details
47 | func Logging(logger *zap.Logger) func(http.Handler) http.Handler {
48 | 	return func(next http.Handler) http.Handler {
49 | 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
50 | 			start := time.Now()
51 | 			rw := NewResponseWriter(w)
52 | 
53 | 			// Log request details
54 | 			logger.Info("Request started",
55 | 				zap.String("method", r.Method),
56 | 				zap.String("path", r.URL.Path),
57 | 				zap.String("remote_addr", r.RemoteAddr),
58 | 				zap.String("user_agent", r.UserAgent()),
59 | 			)
60 | 
61 | 			next.ServeHTTP(rw, r)
62 | 
63 | 			// Log response details
64 | 			logger.Info("Request completed",
65 | 				zap.Duration("duration", time.Since(start)),
66 | 				zap.Int("status", rw.Status()),
67 | 				zap.Int64("size", rw.Size()),
68 | 			)
69 | 		})
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/server/middleware/metrics.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"strconv"
 6 | 	"time"
 7 | 
 8 | 	"github.com/teilomillet/hapax/server/metrics"
 9 | )
10 | 
11 | // PrometheusMetrics middleware records HTTP metrics using Prometheus.
12 | // It wraps the HTTP handler to measure request duration and active requests.
13 | // It takes a Metrics object as an argument to track metrics.
14 | func PrometheusMetrics(m *metrics.Metrics) func(next http.Handler) http.Handler {
15 |     // Return a function that takes an http.Handler and returns another http.Handler
16 |     return func(next http.Handler) http.Handler {
17 |         // Return an http.HandlerFunc that wraps the original handler
18 |         return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
19 |             // Record the start time of the request
20 |             start := time.Now()
21 | 
22 |             // Track active requests
23 |             // Increment the active request count for the current URL path
24 |             m.ActiveRequests.WithLabelValues(r.URL.Path).Inc()
25 |             // Decrement the active request count when the request is done
26 |             defer m.ActiveRequests.WithLabelValues(r.URL.Path).Dec()
27 | 
28 |             // Create a response writer that captures the status code
29 |             // This allows us to intercept the status code returned by the handler
30 |             // and record metrics about the response status
31 |             rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
32 | 
33 |             // Call the next handler in the chain
34 |             next.ServeHTTP(rw, r)
35 | 
36 |             // Record metrics
37 |             // Calculate the request duration
38 |             duration := time.Since(start).Seconds()
39 |             // Convert the status code to a string
40 |             status := strconv.Itoa(rw.statusCode)
41 | 
42 |             // Increment the total request count for the current URL path and status code
43 |             m.RequestsTotal.WithLabelValues(r.URL.Path, status).Inc()
44 |             // Record the request duration for the current URL path
45 |             m.RequestDuration.WithLabelValues(r.URL.Path).Observe(duration)
46 | 
47 |             // Record errors
48 |             // Check if the status code indicates a server error
49 |             if rw.statusCode >= 500 {
50 |                 // Increment the error count for server errors
51 |                 m.ErrorsTotal.WithLabelValues("server_error").Inc()
52 |             } else if rw.statusCode >= 400 {
53 |                 // Increment the error count for client errors
54 |                 m.ErrorsTotal.WithLabelValues("client_error").Inc()
55 |             }
56 |         })
57 |     }
58 | }
59 | 
60 | // responseWriter wraps http.ResponseWriter to capture the status code
61 | // It holds the status code and a flag to check if the header has been written.
62 | type responseWriter struct {
63 |     http.ResponseWriter
64 |     statusCode    int
65 |     wroteHeader   bool
66 | }
67 | 
68 | // WriteHeader captures the status code and writes it to the response.
69 | // It overrides the default behavior of the ResponseWriter.
70 | func (rw *responseWriter) WriteHeader(code int) {
71 |     // Store the status code
72 |     rw.statusCode = code
73 |     // Mark that the header has been written
74 |     rw.wroteHeader = true
75 |     // Call the original WriteHeader method
76 |     rw.ResponseWriter.WriteHeader(code)
77 | }
78 | 
79 | // Write captures the response body and allows us to record metrics.
80 | // It overrides the default behavior of the ResponseWriter.
81 | func (rw *responseWriter) Write(b []byte) (int, error) {
82 |     // If the header has not been written, write it with a status code of 200
83 |     if !rw.wroteHeader {
84 |         rw.WriteHeader(http.StatusOK)
85 |     }
86 |     // Call the original Write method to write the response
87 |     return rw.ResponseWriter.Write(b)
88 | }
89 | 


--------------------------------------------------------------------------------
/server/middleware/metrics_test.go:
--------------------------------------------------------------------------------
  1 | package middleware_test
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"net/http"
  7 | 	"net/http/httptest"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/prometheus/client_golang/prometheus"
 11 | 	"github.com/prometheus/client_golang/prometheus/testutil"
 12 | 	"github.com/stretchr/testify/assert"
 13 | 	"github.com/stretchr/testify/require"
 14 | 	"github.com/teilomillet/gollm"
 15 | 	"github.com/teilomillet/hapax/config"
 16 | 	"github.com/teilomillet/hapax/server/metrics"
 17 | 	"github.com/teilomillet/hapax/server/middleware"
 18 | 	"github.com/teilomillet/hapax/server/mocks"
 19 | 	"github.com/teilomillet/hapax/server/provider"
 20 | 	"go.uber.org/zap"
 21 | )
 22 | 
 23 | func TestPrometheusMetrics(t *testing.T) {
 24 | 	// Create new metrics instance for testing
 25 | 	m := metrics.NewMetrics()
 26 | 
 27 | 	tests := []struct {
 28 | 		name           string
 29 | 		handler        http.HandlerFunc
 30 | 		expectedCode   int
 31 | 		expectedPath   string
 32 | 		expectedStatus string
 33 | 	}{
 34 | 		{
 35 | 			name: "success request",
 36 | 			handler: func(w http.ResponseWriter, r *http.Request) {
 37 | 				w.WriteHeader(http.StatusOK)
 38 | 			},
 39 | 			expectedCode:   http.StatusOK,
 40 | 			expectedPath:   "/",
 41 | 			expectedStatus: "200",
 42 | 		},
 43 | 		{
 44 | 			name: "error request",
 45 | 			handler: func(w http.ResponseWriter, r *http.Request) {
 46 | 				w.WriteHeader(http.StatusInternalServerError)
 47 | 			},
 48 | 			expectedCode:   http.StatusInternalServerError,
 49 | 			expectedPath:   "/",
 50 | 			expectedStatus: "500",
 51 | 		},
 52 | 	}
 53 | 
 54 | 	for _, tt := range tests {
 55 | 		t.Run(tt.name, func(t *testing.T) {
 56 | 			// Create test server
 57 | 			handler := middleware.PrometheusMetrics(m)(tt.handler)
 58 | 			server := httptest.NewServer(handler)
 59 | 			defer server.Close()
 60 | 
 61 | 			// Make request
 62 | 			resp, err := http.Get(server.URL)
 63 | 			if err != nil {
 64 | 				t.Fatal(err)
 65 | 			}
 66 | 			defer resp.Body.Close()
 67 | 
 68 | 			// Check response code
 69 | 			assert.Equal(t, tt.expectedCode, resp.StatusCode)
 70 | 
 71 | 			// Check request metrics
 72 | 			requestCount := testutil.ToFloat64(m.RequestsTotal.WithLabelValues(tt.expectedPath, tt.expectedStatus))
 73 | 			assert.Equal(t, float64(1), requestCount)
 74 | 
 75 | 			// Check active requests (should be 0 after request completes)
 76 | 			activeRequests := testutil.ToFloat64(m.ActiveRequests.WithLabelValues(tt.expectedPath))
 77 | 			assert.Equal(t, float64(0), activeRequests)
 78 | 
 79 | 			// Check error metrics for 5xx responses
 80 | 			if tt.expectedCode >= 500 {
 81 | 				errorCount := testutil.ToFloat64(m.ErrorsTotal.WithLabelValues("server_error"))
 82 | 				assert.Equal(t, float64(1), errorCount)
 83 | 			}
 84 | 		})
 85 | 	}
 86 | }
 87 | 
 88 | // TestMetricsObservability systematically validates metrics tracking mechanisms
 89 | func TestMetricsObservability(t *testing.T) {
 90 | 	// Comprehensive Test Scenarios
 91 | 	testCases := []struct {
 92 | 		name             string
 93 | 		providerBehavior func(context.Context, *gollm.Prompt) (string, error)
 94 | 		expectedMetrics  map[string]float64
 95 | 		expectedError    bool
 96 | 	}{
 97 | 		{
 98 | 			name: "Successful Provider Interaction",
 99 | 			providerBehavior: func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
100 | 				return "Successful response", nil
101 | 			},
102 | 			expectedMetrics: map[string]float64{
103 | 				"hapax_provider_requests_total": 1,
104 | 				"hapax_provider_errors_total":   0,
105 | 			},
106 | 			expectedError: false,
107 | 		},
108 | 		{
109 | 			name: "Provider Failure Scenario",
110 | 			providerBehavior: func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
111 | 				return "", fmt.Errorf("simulated provider error")
112 | 			},
113 | 			expectedMetrics: map[string]float64{
114 | 				"hapax_provider_requests_total": 1,
115 | 				"hapax_provider_errors_total":   1,
116 | 			},
117 | 			expectedError: true,
118 | 		},
119 | 	}
120 | 
121 | 	for _, tc := range testCases {
122 | 		t.Run(tc.name, func(t *testing.T) {
123 | 			// Create precise metrics tracking infrastructure
124 | 			requestsTotal := prometheus.NewCounterVec(
125 | 				prometheus.CounterOpts{
126 | 					Name: "hapax_provider_requests_total",
127 | 					Help: "Total number of provider requests",
128 | 				},
129 | 				[]string{"provider"},
130 | 			)
131 | 
132 | 			errorsTotal := prometheus.NewCounterVec(
133 | 				prometheus.CounterOpts{
134 | 					Name: "hapax_provider_errors_total",
135 | 					Help: "Total number of provider errors",
136 | 				},
137 | 				[]string{"provider"},
138 | 			)
139 | 
140 | 			// Establish comprehensive metrics registry
141 | 			registry := prometheus.NewRegistry()
142 | 			registry.MustRegister(requestsTotal, errorsTotal)
143 | 
144 | 			// Create mock provider with explicit error generation
145 | 			mockProvider := mocks.NewMockLLMWithConfig(
146 | 				"test",
147 | 				"test-model",
148 | 				func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
149 | 					// Directly use the test case's provider behavior
150 | 					return tc.providerBehavior(ctx, prompt)
151 | 				},
152 | 			)
153 | 
154 | 			// Construct provider configuration
155 | 			cfg := &config.Config{
156 | 				TestMode: true,
157 | 				Providers: map[string]config.ProviderConfig{
158 | 					"test": {
159 | 						Type:  "test",
160 | 						Model: "test-model",
161 | 					},
162 | 				},
163 | 				ProviderPreference: []string{"test"},
164 | 			}
165 | 
166 | 			// Initialize provider manager
167 | 			logger := zap.NewNop()
168 | 			manager, err := provider.NewManager(cfg, logger, registry)
169 | 			require.NoError(t, err)
170 | 
171 | 			// Configure providers
172 | 			providers := map[string]gollm.LLM{
173 | 				"test": mockProvider,
174 | 			}
175 | 			manager.SetProviders(providers)
176 | 
177 | 			// Prepare test prompt
178 | 			prompt := &gollm.Prompt{
179 | 				Messages: []gollm.PromptMessage{
180 | 					{Role: "user", Content: "Test metrics observability"},
181 | 				},
182 | 			}
183 | 
184 | 			// Increment request metric before execution
185 | 			requestsTotal.WithLabelValues("test").Inc()
186 | 
187 | 			// Execute request with comprehensive error handling
188 | 			var executionError error
189 | 			err = manager.Execute(context.Background(), func(llm gollm.LLM) error {
190 | 				_, execErr := llm.Generate(context.Background(), prompt)
191 | 
192 | 				// Track error metric for failure scenarios
193 | 				if execErr != nil {
194 | 					errorsTotal.WithLabelValues("test").Inc()
195 | 				}
196 | 
197 | 				// Capture and preserve execution error
198 | 				executionError = execErr
199 | 				return execErr
200 | 			}, prompt)
201 | 
202 | 			// Error expectation validation
203 | 			if tc.expectedError {
204 | 				require.Error(t, executionError, "Expected error in failure scenario")
205 | 				require.Error(t, err, "Manager execution should propagate error")
206 | 			} else {
207 | 				require.NoError(t, executionError, "No error expected in successful scenario")
208 | 				require.NoError(t, err, "Manager execution should succeed")
209 | 			}
210 | 
211 | 			// Comprehensive metrics verification
212 | 			mfs, err := registry.Gather()
213 | 			require.NoError(t, err)
214 | 
215 | 			// Systematic metrics validation mechanism
216 | 			for _, mf := range mfs {
217 | 				for _, metric := range mf.GetMetric() {
218 | 					switch mf.GetName() {
219 | 					case "hapax_provider_requests_total":
220 | 						actualValue := metric.GetCounter().GetValue()
221 | 						assert.Equal(t,
222 | 							tc.expectedMetrics["hapax_provider_requests_total"],
223 | 							actualValue,
224 | 							"Requests total metric did not match expected value",
225 | 						)
226 | 
227 | 					case "hapax_provider_errors_total":
228 | 						actualValue := metric.GetCounter().GetValue()
229 | 						assert.Equal(t,
230 | 							tc.expectedMetrics["hapax_provider_errors_total"],
231 | 							actualValue,
232 | 							"Errors total metric did not match expected value",
233 | 						)
234 | 					}
235 | 				}
236 | 			}
237 | 		})
238 | 	}
239 | }
240 | 


--------------------------------------------------------------------------------
/server/middleware/middleware.go:
--------------------------------------------------------------------------------
 1 | package middleware
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"time"
 6 | 
 7 | 	"github.com/go-chi/chi/v5/middleware"
 8 | 	"github.com/teilomillet/hapax/errors"
 9 | )
10 | 
11 | // RequestTimer measures request processing time
12 | // It wraps the HTTP handler to calculate the duration of the request
13 | // and sets the X-Response-Time header in the response.
14 | func RequestTimer(next http.Handler) http.Handler {
15 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
16 | 		start := time.Now() // Record the start time of the request
17 | 		ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor) // Wrap the response writer
18 | 		next.ServeHTTP(ww, r) // Call the next handler
19 | 		duration := time.Since(start) // Calculate the duration
20 | 		w.Header().Set("X-Response-Time", duration.String()) // Set the response header
21 | 	})
22 | }
23 | 
24 | // PanicRecovery recovers from panics and returns a 500 error
25 | func PanicRecovery(next http.Handler) http.Handler {
26 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
27 | 		defer func() {
28 | 			if err := recover(); err != nil {
29 | 				errors.ErrorWithType(w, "Internal server error", errors.InternalError, http.StatusInternalServerError)
30 | 			}
31 | 		}()
32 | 		next.ServeHTTP(w, r)
33 | 	})
34 | }
35 | 
36 | // CORS handles Cross-Origin Resource Sharing
37 | // It allows or denies requests from different origins based on the configuration.
38 | func CORS(next http.Handler) http.Handler {
39 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
40 |         // Set CORS headers to allow cross-origin requests
41 |         w.Header().Set("Access-Control-Allow-Origin", "*") // Allow all origins
42 |         w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") // Allow GET, POST, PUT, DELETE, and OPTIONS methods
43 |         w.Header().Set("Access-Control-Allow-Headers", "Accept, Authorization, Content-Type, X-CSRF-Token") // Allow Accept, Authorization, Content-Type, and X-CSRF-Token headers
44 | 
45 |         // Handle preflight requests
46 |         if r.Method == http.MethodOptions {
47 |             // Respond with 204 No Content for preflight requests
48 |             w.WriteHeader(http.StatusNoContent)
49 |             return
50 |         }
51 | 
52 |         // Call the next handler for non-preflight requests
53 |         next.ServeHTTP(w, r)
54 |     })
55 | }
56 | 


--------------------------------------------------------------------------------
/server/middleware/middleware_test.go:
--------------------------------------------------------------------------------
  1 | package middleware_test
  2 | 
  3 | import (
  4 | 	"net/http"
  5 | 	"net/http/httptest"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/prometheus/client_golang/prometheus"
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/teilomillet/hapax/server/metrics"
 12 | 	"github.com/teilomillet/hapax/server/middleware"
 13 | )
 14 | 
 15 | func TestRequestID(t *testing.T) {
 16 | 	handler := middleware.RequestID(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 17 | 		// Handler should see request ID in context
 18 | 		requestID := r.Context().Value(middleware.RequestIDKey).(string)
 19 | 		assert.NotEmpty(t, requestID)
 20 | 		assert.Equal(t, requestID, w.Header().Get("X-Request-ID"))
 21 | 	}))
 22 | 
 23 | 	tests := []struct {
 24 | 		name string
 25 | 	}{
 26 | 		{
 27 | 			name: "generates new request ID",
 28 | 		},
 29 | 	}
 30 | 
 31 | 	for _, tt := range tests {
 32 | 		t.Run(tt.name, func(t *testing.T) {
 33 | 			req := httptest.NewRequest("GET", "/", nil)
 34 | 			rec := httptest.NewRecorder()
 35 | 			
 36 | 			handler.ServeHTTP(rec, req)
 37 | 
 38 | 			// Check response header
 39 | 			respID := rec.Header().Get("X-Request-ID")
 40 | 			assert.NotEmpty(t, respID)
 41 | 
 42 | 			// Request ID should be a UUID
 43 | 			assert.Len(t, respID, 36) // UUID v4 length
 44 | 		})
 45 | 	}
 46 | }
 47 | 
 48 | func TestRequestTimer(t *testing.T) {
 49 | 	handler := middleware.RequestTimer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 50 | 		time.Sleep(10 * time.Millisecond) // Simulate some work
 51 | 	}))
 52 | 
 53 | 	req := httptest.NewRequest("GET", "/", nil)
 54 | 	rec := httptest.NewRecorder()
 55 | 
 56 | 	handler.ServeHTTP(rec, req)
 57 | 
 58 | 	respTime := rec.Header().Get("X-Response-Time")
 59 | 	assert.NotEmpty(t, respTime)
 60 | 	
 61 | 	duration, err := time.ParseDuration(respTime)
 62 | 	assert.NoError(t, err)
 63 | 	assert.GreaterOrEqual(t, duration, 10*time.Millisecond)
 64 | }
 65 | 
 66 | func TestPanicRecovery(t *testing.T) {
 67 | 	handler := middleware.PanicRecovery(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 68 | 		panic("test panic")
 69 | 	}))
 70 | 
 71 | 	req := httptest.NewRequest("GET", "/", nil)
 72 | 	rec := httptest.NewRecorder()
 73 | 
 74 | 	handler.ServeHTTP(rec, req)
 75 | 
 76 | 	assert.Equal(t, http.StatusInternalServerError, rec.Code)
 77 | }
 78 | 
 79 | func TestCORS(t *testing.T) {
 80 | 	tests := []struct {
 81 | 		name           string
 82 | 		method         string
 83 | 		expectedStatus int
 84 | 		expectedHeaders map[string]string
 85 | 	}{
 86 | 		{
 87 | 			name:   "preflight request",
 88 | 			method: "OPTIONS",
 89 | 			expectedStatus: http.StatusNoContent,
 90 | 			expectedHeaders: map[string]string{
 91 | 				"Access-Control-Allow-Origin":  "*",
 92 | 				"Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
 93 | 				"Access-Control-Allow-Headers": "Accept, Authorization, Content-Type, X-CSRF-Token",
 94 | 			},
 95 | 		},
 96 | 		{
 97 | 			name:   "normal request",
 98 | 			method: "GET",
 99 | 			expectedStatus: http.StatusOK,
100 | 			expectedHeaders: map[string]string{
101 | 				"Access-Control-Allow-Origin":  "*",
102 | 				"Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
103 | 				"Access-Control-Allow-Headers": "Accept, Authorization, Content-Type, X-CSRF-Token",
104 | 			},
105 | 		},
106 | 	}
107 | 
108 | 	for _, tt := range tests {
109 | 		t.Run(tt.name, func(t *testing.T) {
110 | 			handler := middleware.CORS(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
111 | 				w.WriteHeader(http.StatusOK)
112 | 			}))
113 | 
114 | 			req := httptest.NewRequest(tt.method, "/", nil)
115 | 			rr := httptest.NewRecorder()
116 | 
117 | 			handler.ServeHTTP(rr, req)
118 | 
119 | 			assert.Equal(t, tt.expectedStatus, rr.Code)
120 | 			for key, value := range tt.expectedHeaders {
121 | 				assert.Equal(t, value, rr.Header().Get(key))
122 | 			}
123 | 		})
124 | 	}
125 | }
126 | 
127 | func TestAuthentication(t *testing.T) {
128 | 	// Create test handler
129 | 	nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
130 | 		w.WriteHeader(http.StatusOK)
131 | 	})
132 | 	handler := middleware.Authentication(nextHandler)
133 | 
134 | 	// Test without auth header
135 | 	req := httptest.NewRequest("GET", "/", nil)
136 | 	w := httptest.NewRecorder()
137 | 	handler.ServeHTTP(w, req)
138 | 	assert.Equal(t, http.StatusUnauthorized, w.Code)
139 | 
140 | 	// Test with invalid auth header
141 | 	req = httptest.NewRequest("GET", "/", nil)
142 | 	req.Header.Set("Authorization", "invalid")
143 | 	w = httptest.NewRecorder()
144 | 	handler.ServeHTTP(w, req)
145 | 	assert.Equal(t, http.StatusUnauthorized, w.Code)
146 | 
147 | 	// Test with valid auth header
148 | 	req = httptest.NewRequest("GET", "/", nil)
149 | 	req.Header.Set("Authorization", "Bearer valid-token")
150 | 	w = httptest.NewRecorder()
151 | 	handler.ServeHTTP(w, req)
152 | 	assert.Equal(t, http.StatusOK, w.Code)
153 | }
154 | 
155 | func TestTimeout(t *testing.T) {
156 | 	// Create test handler that sleeps
157 | 	nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
158 | 		time.Sleep(2 * time.Second)
159 | 		w.WriteHeader(http.StatusOK)
160 | 	})
161 | 	handler := middleware.Timeout(1 * time.Second)(nextHandler)
162 | 
163 | 	// Test timeout
164 | 	req := httptest.NewRequest("GET", "/", nil)
165 | 	w := httptest.NewRecorder()
166 | 	handler.ServeHTTP(w, req)
167 | 	assert.Equal(t, http.StatusGatewayTimeout, w.Code)
168 | 
169 | 	// Test success (no timeout)
170 | 	nextHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
171 | 		w.WriteHeader(http.StatusOK)
172 | 	})
173 | 	handler = middleware.Timeout(1 * time.Second)(nextHandler)
174 | 	req = httptest.NewRequest("GET", "/", nil)
175 | 	w = httptest.NewRecorder()
176 | 	handler.ServeHTTP(w, req)
177 | 	assert.Equal(t, http.StatusOK, w.Code)
178 | }
179 | 
180 | func TestRateLimit(t *testing.T) {
181 | 	// Reset metrics registry
182 | 	prometheus.DefaultRegisterer = prometheus.NewRegistry()
183 | 
184 | 	// Create metrics
185 | 	m := metrics.NewMetrics()
186 | 
187 | 	// Create test handler
188 | 	nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
189 | 		w.WriteHeader(http.StatusOK)
190 | 	})
191 | 
192 | 	// Create middleware handler
193 | 	handler := middleware.RateLimit(m)(nextHandler)
194 | 
195 | 	// Test rate limit
196 | 	for i := 0; i < 11; i++ {
197 | 		req := httptest.NewRequest("GET", "/", nil)
198 | 		req.RemoteAddr = "127.0.0.1:1234"
199 | 		w := httptest.NewRecorder()
200 | 		handler.ServeHTTP(w, req)
201 | 
202 | 		if i < 10 {
203 | 			assert.Equal(t, http.StatusOK, w.Code)
204 | 		} else {
205 | 			assert.Equal(t, http.StatusTooManyRequests, w.Code)
206 | 		}
207 | 	}
208 | }
209 | 


--------------------------------------------------------------------------------
/server/middleware/ratelimit.go:
--------------------------------------------------------------------------------
  1 | package middleware
  2 | 
  3 | import (
  4 | 	"net/http"
  5 | 	"strings"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	"github.com/teilomillet/hapax/errors"
 10 | 	"github.com/teilomillet/hapax/server/metrics"
 11 | 	"golang.org/x/time/rate"
 12 | )
 13 | 
 14 | // rateLimiters holds the rate limiters for each visitor IP address
 15 | // and ensures safe concurrent access using a read-write mutex.
 16 | type rateLimiters struct {
 17 | 	// visitors is a map of IP addresses to their corresponding rate limiters.
 18 | 	visitors map[string]*rate.Limiter
 19 | 	// mu is a read-write mutex that protects access to the visitors map.
 20 | 	mu sync.RWMutex
 21 | }
 22 | 
 23 | // limiters is a global instance of rateLimiters to manage rate limiting.
 24 | var (
 25 | 	limiters = &rateLimiters{
 26 | 		visitors: make(map[string]*rate.Limiter),
 27 | 	}
 28 | )
 29 | 
 30 | // GetOrCreate retrieves the rate limiter for the given IP address,
 31 | // creating a new one if it does not exist.
 32 | func (l *rateLimiters) GetOrCreate(ip string, create func() *rate.Limiter) *rate.Limiter {
 33 | 	// Lock the mutex to ensure exclusive access to the visitors map.
 34 | 	l.mu.Lock()
 35 | 	defer l.mu.Unlock()
 36 | 
 37 | 	// Check if a rate limiter already exists for the given IP address.
 38 | 	limiter, exists := l.visitors[ip]
 39 | 	if !exists {
 40 | 		// If not, create a new rate limiter using the provided create function.
 41 | 		limiter = create()
 42 | 		// Store the new rate limiter in the visitors map.
 43 | 		l.visitors[ip] = limiter
 44 | 	}
 45 | 
 46 | 	return limiter
 47 | }
 48 | 
 49 | // RateLimit creates a new rate limit middleware that applies rate limiting
 50 | // to incoming requests and tracks metrics.
 51 | func RateLimit(metrics *metrics.Metrics) func(http.Handler) http.Handler {
 52 | 	return func(next http.Handler) http.Handler {
 53 | 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 54 | 			// Extract the IP address from the request.
 55 | 			ip := r.RemoteAddr
 56 | 			if idx := strings.LastIndex(ip, ":"); idx != -1 {
 57 | 				// Strip the port number if present.
 58 | 				ip = ip[:idx]
 59 | 			}
 60 | 			
 61 | 			// Get the rate limiter for the IP address, creating a new one if necessary.
 62 | 			limiter := limiters.GetOrCreate(ip, func() *rate.Limiter {
 63 | 				// Create a new rate limiter that allows 10 requests per minute.
 64 | 				return rate.NewLimiter(rate.Every(time.Minute), 10)
 65 | 			})
 66 | 
 67 | 			// Try to allow the request.
 68 | 			if !limiter.Allow() {
 69 | 				// If the request is not allowed, increment the rate limit hit metric.
 70 | 				metrics.RateLimitHits.WithLabelValues(ip).Inc()
 71 | 				var requestID string
 72 | 				if id := r.Context().Value(RequestIDKey); id != nil {
 73 | 					requestID = id.(string)
 74 | 				}
 75 | 
 76 | 				// Create an error response for the rate limit exceeded error.
 77 | 				errResp := errors.NewError(
 78 | 					errors.RateLimitError,
 79 | 					"Rate limit exceeded",
 80 | 					http.StatusTooManyRequests,
 81 | 					requestID,
 82 | 					map[string]interface{}{
 83 | 						"limit":  int64(10), // Use int64 to ensure it's not converted to float64
 84 | 						"window": "1m0s",
 85 | 					},
 86 | 					nil,
 87 | 				)
 88 | 
 89 | 				// Write the error response to the writer.
 90 | 				errors.WriteError(w, errResp)
 91 | 				return
 92 | 			}
 93 | 
 94 | 			// If the request is allowed, serve the next handler.
 95 | 			next.ServeHTTP(w, r)
 96 | 		})
 97 | 	}
 98 | }
 99 | 
100 | // ResetRateLimiters resets all rate limiters. Only used for testing.
101 | func ResetRateLimiters() {
102 | 	limiters.mu.Lock()
103 | 	defer limiters.mu.Unlock()
104 | 	limiters.visitors = make(map[string]*rate.Limiter)
105 | }
106 | 


--------------------------------------------------------------------------------
/server/middleware/ratelimit_test.go:
--------------------------------------------------------------------------------
 1 | package middleware_test
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"net/http/httptest"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/prometheus/client_golang/prometheus/testutil"
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/teilomillet/hapax/server/metrics"
11 | 	"github.com/teilomillet/hapax/server/middleware"
12 | )
13 | 
14 | func TestRateLimitMetrics(t *testing.T) {
15 | 	// Create new metrics instance for testing
16 | 	m := metrics.NewMetrics()
17 | 
18 | 	// Reset rate limiters
19 | 	middleware.ResetRateLimiters()
20 | 
21 | 	// Create test handler
22 | 	handler := middleware.RateLimit(m)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
23 | 		w.WriteHeader(http.StatusOK)
24 | 	}))
25 | 
26 | 	// Create test server
27 | 	server := httptest.NewServer(handler)
28 | 	defer server.Close()
29 | 
30 | 	// Make requests to trigger rate limit
31 | 	client := &http.Client{}
32 | 	testIP := "127.0.0.1"
33 | 
34 | 	// Make 11 requests (1 more than limit)
35 | 	for i := 0; i < 11; i++ {
36 | 		req, err := http.NewRequest("GET", server.URL, nil)
37 | 		assert.NoError(t, err)
38 | 		req.RemoteAddr = testIP + ":1234" // Set test IP
39 | 
40 | 		resp, err := client.Do(req)
41 | 		assert.NoError(t, err)
42 | 		resp.Body.Close()
43 | 
44 | 		// Last request should be rate limited
45 | 		if i == 10 {
46 | 			assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
47 | 
48 | 			// Check rate limit metric
49 | 			rateLimitCount := testutil.ToFloat64(m.RateLimitHits.WithLabelValues(testIP))
50 | 			assert.Equal(t, float64(1), rateLimitCount)
51 | 		}
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/server/middleware/recovery.go:
--------------------------------------------------------------------------------
 1 | // Package middleware provides various middleware functions for HTTP handlers.
 2 | package middleware
 3 | 
 4 | import (
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"runtime/debug"
 8 | 
 9 | 	"github.com/teilomillet/hapax/errors"
10 | 	"go.uber.org/zap"
11 | )
12 | 
13 | // Recovery middleware recovers from panics and logs the error
14 | // It takes a zap.Logger instance for logging errors.
15 | func Recovery(logger *zap.Logger) func(http.Handler) http.Handler {
16 | 	return func(next http.Handler) http.Handler {
17 | 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
18 | 			// Defer a function to recover from panics
19 | 			defer func() {
20 | 				if err := recover(); err != nil {
21 | 					// Capture the stack trace
22 | 					stack := debug.Stack()
23 | 					// Log the error and stack trace
24 | 					logger.Error("Panic recovered",
25 | 						zap.Any("error", err),
26 | 						zap.ByteString("stack", stack),
27 | 					)
28 | 					
29 | 					// Retrieve the request ID from the context
30 | 					requestID := r.Context().Value(RequestIDKey).(string)
31 | 					// Write an internal server error response
32 | 					errors.WriteError(w, errors.NewInternalError(
33 | 						requestID,
34 | 						fmt.Errorf("internal server error: %v", err),
35 | 					))
36 | 				}
37 | 			}()
38 | 
39 | 			// Call the next handler in the chain
40 | 			next.ServeHTTP(w, r)
41 | 		})
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/server/middleware/request_id.go:
--------------------------------------------------------------------------------
 1 | // Package middleware provides various middleware functions for HTTP handlers.
 2 | package middleware
 3 | 
 4 | import (
 5 | 	"context"
 6 | 	"net/http"
 7 | 
 8 | 	"github.com/google/uuid"
 9 | )
10 | 
11 | // RequestID middleware adds a unique request ID to the context
12 | // and sets it in the response header.
13 | func RequestID(next http.Handler) http.Handler {
14 | 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
15 | 		// Generate a unique request ID using UUID.
16 | 		requestID := uuid.New().String()
17 | 
18 | 		// Set the request ID in the response header for tracking.
19 | 		w.Header().Set("X-Request-ID", requestID)
20 | 
21 | 		// Add the request ID to the request context for downstream handlers.
22 | 		ctx := context.WithValue(r.Context(), RequestIDKey, requestID)
23 | 		// Call the next handler with the updated context.
24 | 		next.ServeHTTP(w, r.WithContext(ctx))
25 | 	})
26 | }
27 | 


--------------------------------------------------------------------------------
/server/middleware/timeout.go:
--------------------------------------------------------------------------------
  1 | // Package middleware provides various middleware functions for HTTP handlers.
  2 | package middleware
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"net/http"
  7 | 	"time"
  8 | 
  9 | 	"github.com/teilomillet/hapax/errors"
 10 | )
 11 | 
 12 | const defaultTimeout = 5 * time.Second
 13 | 
 14 | // timeoutWriter wraps http.ResponseWriter to track if a response has been written
 15 | // It uses a channel to signal when the response has been sent.
 16 | type timeoutWriter struct {
 17 | 	http.ResponseWriter
 18 | 	written chan bool
 19 | }
 20 | 
 21 | // Write writes the data to the connection and tracks if the response has been written.
 22 | func (tw *timeoutWriter) Write(b []byte) (int, error) {
 23 | 	n, err := tw.ResponseWriter.Write(b)
 24 | 	if n > 0 {
 25 | 		select {
 26 | 		case tw.written <- true:
 27 | 		default:
 28 | 		}
 29 | 	}
 30 | 	return n, err
 31 | }
 32 | 
 33 | // WriteHeader sends an HTTP response header and tracks if the response has been written.
 34 | func (tw *timeoutWriter) WriteHeader(code int) {
 35 | 	// Call the original WriteHeader method.
 36 | 	tw.ResponseWriter.WriteHeader(code)
 37 | 	select {
 38 | 	case tw.written <- true:
 39 | 	default:
 40 | 	}
 41 | }
 42 | 
 43 | // hasWritten checks if the response has been written.
 44 | func (tw *timeoutWriter) hasWritten() bool {
 45 | 	select {
 46 | 	case <-tw.written:
 47 | 		return true
 48 | 	default:
 49 | 		return false
 50 | 	}
 51 | }
 52 | 
 53 | // Timeout middleware adds a timeout to the request context
 54 | // It allows you to specify a duration after which the request will be aborted if not completed.
 55 | // 
 56 | // The Timeout middleware works by creating a new context with a timeout, and using a custom 
 57 | // timeoutWriter to track whether a response has been written. If the request times out and 
 58 | // no response has been written, it sends a timeout error response.
 59 | func Timeout(timeout time.Duration) func(http.Handler) http.Handler {
 60 | 	return func(next http.Handler) http.Handler {
 61 | 		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 62 | 			// Create a context with timeout
 63 | 			if timeout == 0 {
 64 | 				timeout = defaultTimeout
 65 | 			}
 66 | 			ctx, cancel := context.WithTimeout(r.Context(), timeout)
 67 | 			defer cancel() // Ensure cancel is called to release resources
 68 | 			
 69 | 			// Create a channel to signal completion
 70 | 			done := make(chan struct{})
 71 | 			
 72 | 			// Use the custom timeoutWriter to track response status.
 73 | 			tw := &timeoutWriter{
 74 | 				ResponseWriter: w,
 75 | 				written:       make(chan bool, 1),
 76 | 			}
 77 | 
 78 | 			// Process the request in a goroutine
 79 | 			go func() {
 80 | 				defer func() {
 81 | 					close(done)
 82 | 					if ctx.Err() == context.Canceled {
 83 | 						cancel()
 84 | 					}
 85 | 				}()
 86 | 				next.ServeHTTP(tw, r.WithContext(ctx))
 87 | 			}()
 88 | 
 89 | 			// Wait for either completion or timeout
 90 | 			select {
 91 | 			case <-done:
 92 | 				// Request completed normally
 93 | 				return
 94 | 			case <-ctx.Done():
 95 | 				// Request timed out
 96 | 				if !tw.hasWritten() {
 97 | 					// Only write error if nothing has been written yet
 98 | 					var requestID string
 99 | 					if id := r.Context().Value(RequestIDKey); id != nil {
100 | 						requestID = id.(string)
101 | 					}
102 | 
103 | 					errResp := errors.NewError(
104 | 						errors.InternalError,
105 | 						"Request timeout",
106 | 						http.StatusGatewayTimeout,
107 | 						requestID,
108 | 						map[string]interface{}{
109 | 							"timeout": timeout.String(),
110 | 						},
111 | 						ctx.Err(),
112 | 					)
113 | 
114 | 					errors.WriteError(tw, errResp)
115 | 				}
116 | 				// Cancel the context to stop the goroutine
117 | 				cancel()
118 | 				return
119 | 			}
120 | 		})
121 | 	}
122 | }
123 | 


--------------------------------------------------------------------------------
/server/mock_test.go:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"github.com/teilomillet/gollm"
 7 | 	"github.com/teilomillet/gollm/llm"
 8 | 	"github.com/teilomillet/gollm/utils"
 9 | )
10 | 
11 | // MockLLM implements a mock LLM for testing purposes
12 | type MockLLM struct {
13 | 	GenerateFunc func(context.Context, *gollm.Prompt) (string, error)
14 | 	DebugFunc    func(string, ...interface{})
15 | }
16 | 
17 | // NewMockLLM creates a new MockLLM with optional generate function
18 | func NewMockLLM(generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM {
19 | 	return &MockLLM{
20 | 		GenerateFunc: generateFunc,
21 | 	}
22 | }
23 | 
24 | func (m *MockLLM) Generate(ctx context.Context, prompt *gollm.Prompt, opts ...llm.GenerateOption) (string, error) {
25 | 	if m.GenerateFunc != nil {
26 | 		return m.GenerateFunc(ctx, prompt)
27 | 	}
28 | 	return "", nil
29 | }
30 | 
31 | func (m *MockLLM) Debug(format string, args ...interface{}) {
32 | 	if m.DebugFunc != nil {
33 | 		m.DebugFunc(format, args...)
34 | 	}
35 | }
36 | 
37 | func (m *MockLLM) GetPromptJSONSchema(opts ...gollm.SchemaOption) ([]byte, error) {
38 | 	return []byte(`{}`), nil
39 | }
40 | 
41 | func (m *MockLLM) GetProvider() string {
42 | 	return "mock"
43 | }
44 | 
45 | func (m *MockLLM) GetModel() string {
46 | 	return "mock-model"
47 | }
48 | 
49 | func (m *MockLLM) UpdateLogLevel(level gollm.LogLevel) {
50 | 	// No-op for mock
51 | }
52 | 
53 | func (m *MockLLM) GetLogLevel() gollm.LogLevel {
54 | 	return gollm.LogLevelOff
55 | }
56 | 
57 | func (m *MockLLM) SetLogLevel(level gollm.LogLevel) {
58 | 	// No-op for mock
59 | }
60 | 
61 | func (m *MockLLM) GetLogger() utils.Logger {
62 | 	return utils.NewLogger(gollm.LogLevelOff)
63 | }
64 | 
65 | func (m *MockLLM) NewPrompt(text string) *gollm.Prompt {
66 | 	return gollm.NewPrompt(text)
67 | }
68 | 
69 | func (m *MockLLM) SetEndpoint(endpoint string) {
70 | 	// No-op for mock
71 | }
72 | 
73 | func (m *MockLLM) SetOption(key string, value interface{}) {
74 | 	// No-op for mock
75 | }
76 | 
77 | func (m *MockLLM) SupportsJSONSchema() bool {
78 | 	return false
79 | }
80 | 
81 | func (m *MockLLM) GenerateWithSchema(ctx context.Context, prompt *gollm.Prompt, schema interface{}, opts ...llm.GenerateOption) (string, error) {
82 | 	return m.Generate(ctx, prompt, opts...)
83 | }
84 | 
85 | func (m *MockLLM) SetOllamaEndpoint(endpoint string) error {
86 | 	// No-op for mock
87 | 	return nil
88 | }
89 | 
90 | func (m *MockLLM) SetSystemPrompt(prompt string, cacheType llm.CacheType) {
91 | 	// No-op for mock
92 | }
93 | 


--------------------------------------------------------------------------------
/server/mocks/config_watcher.go:
--------------------------------------------------------------------------------
 1 | package mocks
 2 | 
 3 | import (
 4 | 	"sync/atomic"
 5 | 
 6 | 	"github.com/teilomillet/hapax/config"
 7 | )
 8 | 
 9 | // MockConfigWatcher provides a testable implementation of config.Watcher
10 | type MockConfigWatcher struct {
11 | 	currentConfig atomic.Value
12 | 	subscribers   []chan *config.Config
13 | }
14 | 
15 | // Verify at compile time that MockConfigWatcher implements config.Watcher
16 | var _ config.Watcher = (*MockConfigWatcher)(nil)
17 | 
18 | // NewMockConfigWatcher creates a new MockConfigWatcher initialized with the provided config
19 | func NewMockConfigWatcher(cfg *config.Config) *MockConfigWatcher {
20 | 	mcw := &MockConfigWatcher{
21 | 		subscribers: make([]chan *config.Config, 0),
22 | 	}
23 | 	mcw.currentConfig.Store(cfg)
24 | 	return mcw
25 | }
26 | 
27 | // GetCurrentConfig implements config.Watcher
28 | func (m *MockConfigWatcher) GetCurrentConfig() *config.Config {
29 | 	return m.currentConfig.Load().(*config.Config)
30 | }
31 | 
32 | // Subscribe implements config.Watcher
33 | func (m *MockConfigWatcher) Subscribe() <-chan *config.Config {
34 | 	ch := make(chan *config.Config, 1)
35 | 	m.subscribers = append(m.subscribers, ch)
36 | 
37 | 	// Send current config immediately
38 | 	cfg := m.GetCurrentConfig()
39 | 	select {
40 | 	case ch <- cfg:
41 | 	default:
42 | 	}
43 | 
44 | 	return ch
45 | }
46 | 
47 | // Close implements config.Watcher
48 | func (m *MockConfigWatcher) Close() error {
49 | 	for _, ch := range m.subscribers {
50 | 		close(ch)
51 | 	}
52 | 	m.subscribers = nil
53 | 	return nil
54 | }
55 | 
56 | // UpdateConfig is a test helper that simulates configuration changes
57 | func (m *MockConfigWatcher) UpdateConfig(cfg *config.Config) {
58 | 	m.currentConfig.Store(cfg)
59 | 
60 | 	for _, ch := range m.subscribers {
61 | 		select {
62 | 		case ch <- cfg:
63 | 		default:
64 | 			// Skip if channel is blocked
65 | 		}
66 | 	}
67 | }
68 | 


--------------------------------------------------------------------------------
/server/mocks/llm.go:
--------------------------------------------------------------------------------
  1 | package mocks
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"time"
  6 | 
  7 | 	"github.com/teilomillet/gollm"
  8 | 	"github.com/teilomillet/gollm/llm"
  9 | 	"github.com/teilomillet/gollm/utils"
 10 | 	"github.com/teilomillet/hapax/server/middleware"
 11 | )
 12 | 
 13 | // MockLLM implements a mock LLM for testing purposes.
 14 | // It provides a flexible way to simulate LLM behavior in tests without making actual API calls.
 15 | //
 16 | // Key features:
 17 | // 1. Configurable response generation through GenerateFunc
 18 | // 2. Debug logging capture through DebugFunc
 19 | // 3. Default implementations for all interface methods
 20 | //
 21 | // Example usage:
 22 | //
 23 | //	mockLLM := NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
 24 | //	    return "mocked response", nil
 25 | //	})
 26 | type MockLLM struct {
 27 | 	GenerateFunc func(context.Context, *gollm.Prompt) (string, error)
 28 | 	DebugFunc    func(string, ...interface{})
 29 | 	Provider     string // Provider name for testing
 30 | 	Model        string // Model name for testing
 31 | }
 32 | 
 33 | // NewMockLLM creates a new MockLLM with optional generate function.
 34 | // If generateFunc is nil, Generate will return empty string with no error.
 35 | func NewMockLLM(generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM {
 36 | 	return &MockLLM{
 37 | 		GenerateFunc: generateFunc,
 38 | 		Provider:     "mock",
 39 | 		Model:        "mock-model",
 40 | 	}
 41 | }
 42 | 
 43 | // NewMockLLMWithConfig creates a new MockLLM with specific provider and model names
 44 | func NewMockLLMWithConfig(provider, model string, generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM {
 45 | 	return &MockLLM{
 46 | 		GenerateFunc: generateFunc,
 47 | 		Provider:     provider,
 48 | 		Model:        model,
 49 | 	}
 50 | }
 51 | 
 52 | // Generate implements the core LLM functionality.
 53 | // It uses the provided GenerateFunc if available, otherwise returns empty string.
 54 | // The opts parameter is ignored in the mock to simplify testing.
 55 | func (m *MockLLM) Generate(ctx context.Context, prompt *gollm.Prompt, opts ...llm.GenerateOption) (string, error) {
 56 | 	// Check for timeout header
 57 | 	if ctx.Value(middleware.XTestTimeoutKey) != nil {
 58 | 		// Sleep longer than the timeout
 59 | 		time.Sleep(10 * time.Second)
 60 | 		return "", context.DeadlineExceeded
 61 | 	}
 62 | 
 63 | 	if m.GenerateFunc != nil {
 64 | 		return m.GenerateFunc(ctx, prompt)
 65 | 	}
 66 | 	return "", nil
 67 | }
 68 | 
 69 | // Debug captures debug messages if DebugFunc is provided.
 70 | // This allows tests to verify logging behavior if needed.
 71 | func (m *MockLLM) Debug(format string, args ...interface{}) {
 72 | 	if m.DebugFunc != nil {
 73 | 		m.DebugFunc(format, args...)
 74 | 	}
 75 | }
 76 | 
 77 | // GetPromptJSONSchema returns a minimal valid JSON schema.
 78 | // This is useful for testing schema validation without complex schemas.
 79 | func (m *MockLLM) GetPromptJSONSchema(opts ...gollm.SchemaOption) ([]byte, error) {
 80 | 	return []byte(`{}`), nil
 81 | }
 82 | 
 83 | // GetProvider returns the mock provider name
 84 | func (m *MockLLM) GetProvider() string {
 85 | 	return m.Provider
 86 | }
 87 | 
 88 | // GetModel returns the mock model name
 89 | func (m *MockLLM) GetModel() string {
 90 | 	return m.Model
 91 | }
 92 | 
 93 | // GetLogLevel returns a default log level.
 94 | // Tests can rely on this consistent behavior.
 95 | func (m *MockLLM) GetLogLevel() gollm.LogLevel {
 96 | 	return gollm.LogLevelInfo
 97 | }
 98 | 
 99 | // UpdateLogLevel is a no-op in the mock.
100 | // Real implementation would change logging behavior.
101 | func (m *MockLLM) UpdateLogLevel(level gollm.LogLevel) {
102 | 	// No-op for mock
103 | }
104 | 
105 | // SetLogLevel is a no-op in the mock.
106 | // Real implementation would change logging behavior.
107 | func (m *MockLLM) SetLogLevel(level gollm.LogLevel) {
108 | 	// No-op for mock
109 | }
110 | 
111 | // GetLogger returns nil as we don't need logging in tests.
112 | // Real implementation would return a logger instance.
113 | func (m *MockLLM) GetLogger() utils.Logger {
114 | 	return nil
115 | }
116 | 
117 | // NewPrompt creates a simple prompt with user role.
118 | // This provides consistent prompt creation for tests.
119 | func (m *MockLLM) NewPrompt(text string) *gollm.Prompt {
120 | 	return &gollm.Prompt{
121 | 		Messages: []gollm.PromptMessage{
122 | 			{Role: "user", Content: text},
123 | 		},
124 | 	}
125 | }
126 | 
127 | // SetEndpoint is a no-op in the mock.
128 | // Real implementation would configure the API endpoint.
129 | func (m *MockLLM) SetEndpoint(endpoint string) {
130 | 	// No-op for mock
131 | }
132 | 
133 | // SetOption is a no-op in the mock.
134 | // Real implementation would configure LLM options.
135 | func (m *MockLLM) SetOption(key string, value interface{}) {
136 | 	// No-op for mock
137 | }
138 | 
139 | // SupportsJSONSchema returns true to indicate schema support.
140 | // This allows testing schema-related functionality.
141 | func (m *MockLLM) SupportsJSONSchema() bool {
142 | 	return true
143 | }
144 | 
145 | // GenerateWithSchema uses the standard Generate function.
146 | // Schema validation is not performed in the mock.
147 | func (m *MockLLM) GenerateWithSchema(ctx context.Context, prompt *gollm.Prompt, schema interface{}, opts ...llm.GenerateOption) (string, error) {
148 | 	if m.GenerateFunc != nil {
149 | 		return m.GenerateFunc(ctx, prompt)
150 | 	}
151 | 	return "", nil
152 | }
153 | 
154 | // SetOllamaEndpoint is a no-op in the mock.
155 | // Real implementation would configure Ollama endpoint.
156 | func (m *MockLLM) SetOllamaEndpoint(endpoint string) error {
157 | 	return nil
158 | }
159 | 
160 | // SetSystemPrompt is a no-op in the mock.
161 | // Real implementation would set a system-level prompt.
162 | func (m *MockLLM) SetSystemPrompt(prompt string, cacheType llm.CacheType) {
163 | 	// No-op for mock
164 | }
165 | 


--------------------------------------------------------------------------------
/server/processing/processor.go:
--------------------------------------------------------------------------------
  1 | // Package processing provides request processing and response formatting for LLM interactions.
  2 | package processing
  3 | 
  4 | import (
  5 | 	"bytes"
  6 | 	"context"
  7 | 	"fmt"
  8 | 	"strings"
  9 | 	"text/template"
 10 | 
 11 | 	"github.com/teilomillet/gollm"
 12 | 	"github.com/teilomillet/hapax/config"
 13 | )
 14 | 
 15 | // Processor handles request processing and response formatting for LLM interactions.
 16 | // It uses Go templates to transform incoming requests into LLM-compatible formats,
 17 | // communicates with the LLM, and formats the responses according to configuration.
 18 | //
 19 | // Key features:
 20 | // - Template-based request transformation
 21 | // - Configurable response formatting
 22 | // - Support for both simple and chat completions
 23 | // - System prompt management
 24 | //
 25 | // The Processor is designed to be reusable across different request types
 26 | // while maintaining consistent formatting and error handling.
 27 | type Processor struct {
 28 | 	llm           gollm.LLM                     // The LLM instance to use for generation
 29 | 	templates     map[string]*template.Template // Compiled templates for request formatting
 30 | 	config        *config.ProcessingConfig      // Configuration for processing behavior
 31 | 	defaultPrompt string                        // Default system prompt for all requests
 32 | }
 33 | 
 34 | // NewProcessor creates a new processor instance with the given configuration and LLM.
 35 | // It validates the configuration and pre-compiles all templates for efficiency.
 36 | //
 37 | // Parameters:
 38 | // - cfg: Processing configuration including templates and formatting options
 39 | // - llm: LLM instance to use for text generation
 40 | //
 41 | // Returns:
 42 | // - A new Processor instance and nil error if successful
 43 | // - nil and error if configuration is invalid or template compilation fails
 44 | //
 45 | // The processor will fail fast if any templates are invalid, preventing runtime errors.
 46 | func NewProcessor(cfg *config.ProcessingConfig, llm gollm.LLM) (*Processor, error) {
 47 | 	if cfg == nil {
 48 | 		return nil, fmt.Errorf("processing config is required")
 49 | 	}
 50 | 	if llm == nil {
 51 | 		return nil, fmt.Errorf("LLM instance is required")
 52 | 	}
 53 | 
 54 | 	// Parse all templates at initialization to fail fast on invalid templates
 55 | 	templates := make(map[string]*template.Template)
 56 | 	for name, tmpl := range cfg.RequestTemplates {
 57 | 		t, err := template.New(name).Parse(tmpl)
 58 | 		if err != nil {
 59 | 			return nil, fmt.Errorf("failed to parse template %s: %w", name, err)
 60 | 		}
 61 | 		templates[name] = t
 62 | 	}
 63 | 
 64 | 	return &Processor{
 65 | 		llm:       llm,
 66 | 		templates: templates,
 67 | 		config:    cfg,
 68 | 	}, nil
 69 | }
 70 | 
 71 | // ProcessRequest handles the end-to-end processing of a request:
 72 | // 1. Validates the request
 73 | // 2. Selects and executes the appropriate template
 74 | // 3. Creates an LLM prompt with system context
 75 | // 4. Sends the request to the LLM
 76 | // 5. Formats the response according to configuration
 77 | //
 78 | // Parameters:
 79 | // - ctx: Context for the request, used for cancellation and timeouts
 80 | // - req: The request to process, containing type and input data
 81 | //
 82 | // Returns:
 83 | // - Formatted response and nil error if successful
 84 | // - nil and error if any step fails
 85 | //
 86 | // The processor will use the "default" template if no matching template
 87 | // is found for the request type.
 88 | func (p *Processor) ProcessRequest(ctx context.Context, req *Request) (*Response, error) {
 89 | 	if req == nil {
 90 | 		return nil, fmt.Errorf("request cannot be nil")
 91 | 	}
 92 | 
 93 | 	var promptMessages []gollm.PromptMessage
 94 | 
 95 | 	// Always start with system prompt if we have one
 96 | 	if p.defaultPrompt != "" {
 97 | 		promptMessages = append(promptMessages, gollm.PromptMessage{
 98 | 			Role:    "system",
 99 | 			Content: p.defaultPrompt,
100 | 		})
101 | 	}
102 | 
103 | 	// Now we have two clear paths - either conversation or single input
104 | 	if len(req.Messages) > 0 {
105 | 		// Add debug logging for chat requests
106 | 		fmt.Printf("DEBUG: Processing chat request with %d messages\n", len(req.Messages))
107 | 		// For conversations, we just need to convert the messages directly
108 | 		for _, msg := range req.Messages {
109 | 			fmt.Printf("DEBUG: Adding message - Role: '%s', Content: '%s'\n", msg.Role, msg.Content)
110 | 			promptMessages = append(promptMessages, gollm.PromptMessage{
111 | 				Role:    msg.Role,
112 | 				Content: msg.Content,
113 | 			})
114 | 		}
115 | 	} else if req.Input != "" {
116 | 		// Add debug logging for single input requests
117 | 		fmt.Printf("DEBUG: Processing single input request: '%s'\n", req.Input)
118 | 		// For single inputs, we still use the template system
119 | 		tmpl := p.templates["default"]
120 | 		if t, ok := p.templates[req.Type]; ok {
121 | 			tmpl = t
122 | 		}
123 | 		if tmpl == nil {
124 | 			return nil, fmt.Errorf("no template found for type: %s", req.Type)
125 | 		}
126 | 
127 | 		var buf bytes.Buffer
128 | 		if err := tmpl.Execute(&buf, req); err != nil {
129 | 			return nil, fmt.Errorf("template execution failed: %w", err)
130 | 		}
131 | 
132 | 		promptMessages = append(promptMessages, gollm.PromptMessage{
133 | 			Role:    "user",
134 | 			Content: buf.String(),
135 | 		})
136 | 	} else {
137 | 		return nil, fmt.Errorf("request must contain either messages or input")
138 | 	}
139 | 
140 | 	prompt := &gollm.Prompt{Messages: promptMessages}
141 | 
142 | 	// Add debug logging
143 | 	fmt.Printf("DEBUG: About to send prompt to LLM: %+v\n", prompt)
144 | 	fmt.Printf("DEBUG: Number of messages in prompt: %d\n", len(prompt.Messages))
145 | 	for i, msg := range prompt.Messages {
146 | 		fmt.Printf("DEBUG: Message[%d] - Role: '%s', Content: '%s'\n", i, msg.Role, msg.Content)
147 | 	}
148 | 
149 | 	response, err := p.llm.Generate(ctx, prompt)
150 | 	if err != nil {
151 | 		return nil, fmt.Errorf("LLM processing failed: %w", err)
152 | 	}
153 | 
154 | 	return p.formatResponse(response), nil
155 | }
156 | 
157 | // formatResponse applies configured formatting options to the LLM response:
158 | // 1. Cleans JSON if enabled (removes markdown blocks, formats JSON)
159 | // 2. Trims whitespace if enabled
160 | // 3. Truncates to max length if configured
161 | //
162 | // This ensures consistent response format and size across different
163 | // LLM outputs and request types.
164 | func (p *Processor) formatResponse(content string) *Response {
165 | 	if p.config.ResponseFormatting.CleanJSON {
166 | 		content = gollm.CleanResponse(content)
167 | 	}
168 | 	if p.config.ResponseFormatting.TrimWhitespace {
169 | 		content = strings.TrimSpace(content)
170 | 	}
171 | 	if p.config.ResponseFormatting.MaxLength > 0 && len(content) > p.config.ResponseFormatting.MaxLength {
172 | 		content = content[:p.config.ResponseFormatting.MaxLength]
173 | 	}
174 | 	return &Response{Content: content}
175 | }
176 | 
177 | // SetDefaultPrompt sets the system prompt to be used for all requests.
178 | // This prompt provides context and instructions to the LLM.
179 | func (p *Processor) SetDefaultPrompt(prompt string) {
180 | 	p.defaultPrompt = prompt
181 | }
182 | 


--------------------------------------------------------------------------------
/server/processing/types.go:
--------------------------------------------------------------------------------
 1 | // Package processing provides request processing and response formatting for LLM interactions.
 2 | // It handles template-based request transformation, LLM communication, and response formatting.
 3 | package processing
 4 | 
 5 | // Message represents a single message in a conversation.
 6 | // This follows the standard chat format used by most LLM providers,
 7 | // where each message has a role (e.g., "user", "assistant", "system")
 8 | // and content (the actual message text).
 9 | type Message struct {
10 | 	Role    string `json:"role"`    // Role of the message sender (e.g., "user", "assistant")
11 | 	Content string `json:"content"` // The actual message content
12 | }
13 | 
14 | // Request represents an incoming request to the LLM service.
15 | // It supports two main types of requests:
16 | // 1. Simple completion: Using the Input field with a default template
17 | // 2. Chat completion: Using the Messages field with a chat template
18 | //
19 | // The Type field determines which template is used to format the request.
20 | // This allows for flexible request handling while maintaining a consistent
21 | // interface with the LLM.
22 | type Request struct {
23 | 	// Type indicates the type of request (e.g., "completion", "chat", "function")
24 | 	Type     string    `json:"type"`              // Type of request (e.g., "default", "chat")
25 | 	Input    string    `json:"input"`             // Used for simple completion requests
26 | 	Messages []Message `json:"messages,omitempty"` // Used for chat completion requests
27 | 	// FunctionDescription is used for function-calling requests
28 | 	FunctionDescription string `json:"function_description,omitempty"`
29 | }
30 | 
31 | // Response represents the processed output from the LLM.
32 | // It contains the formatted content after applying any configured
33 | // transformations (e.g., JSON cleaning, whitespace trimming, length limits).
34 | //
35 | // Future extensions might include:
36 | // - Metadata about the processing (e.g., truncation info)
37 | // - Multiple response formats (e.g., text, structured data)
38 | // - Usage statistics (tokens, processing time)
39 | type Response struct {
40 | 	// Content is the processed response content
41 | 	Content string `json:"content"` // The processed response content
42 | 	// Error holds any error information
43 | 	Error string `json:"error,omitempty"`
44 | }
45 | 


--------------------------------------------------------------------------------
/server/provider/errors.go:
--------------------------------------------------------------------------------
1 | package provider
2 | 
3 | import "errors"
4 | 
5 | var (
6 | 	// ErrNoHealthyProvider indicates that no healthy provider is available
7 | 	ErrNoHealthyProvider = errors.New("no healthy provider available")
8 | )
9 | 


--------------------------------------------------------------------------------
/server/provider/execution.go:
--------------------------------------------------------------------------------
  1 | package provider
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"time"
  7 | 
  8 | 	"github.com/sony/gobreaker"
  9 | 	"github.com/teilomillet/gollm"
 10 | 	"github.com/teilomillet/hapax/server/circuitbreaker" // Added import for custom circuit breaker
 11 | 	"go.uber.org/zap"
 12 | )
 13 | 
 14 | // result represents the outcome of an LLM operation
 15 | type result struct {
 16 | 	err    error
 17 | 	status HealthStatus
 18 | 	name   string
 19 | }
 20 | 
 21 | // Execute coordinates provider execution with proper error handling
 22 | func (m *Manager) Execute(ctx context.Context, operation func(llm gollm.LLM) error, prompt *gollm.Prompt) error {
 23 | 	key := m.generateRequestKey(prompt)
 24 | 	m.logger.Debug("Starting Execute", zap.String("key", key))
 25 | 
 26 | 	v, err, shared := m.group.Do(key, func() (interface{}, error) {
 27 | 		return m.executeWithRetries(ctx, operation)
 28 | 	})
 29 | 
 30 | 	if err != nil {
 31 | 		m.logger.Debug("Execute failed", zap.Error(err))
 32 | 		return err
 33 | 	}
 34 | 
 35 | 	m.handleRequestMetrics(shared)
 36 | 	return m.processResult(v.(*result))
 37 | }
 38 | 
 39 | func (m *Manager) executeWithRetries(ctx context.Context, operation func(llm gollm.LLM) error) (*result, error) {
 40 | 	preference := m.getProviderPreference()
 41 | 	if len(preference) == 0 {
 42 | 		return &result{
 43 | 			err: fmt.Errorf("no providers configured"),
 44 | 		}, fmt.Errorf("no providers configured")
 45 | 	}
 46 | 
 47 | 	var lastResult *result
 48 | 
 49 | 	// Try each provider in sequence
 50 | 	for _, name := range preference {
 51 | 		provider, breaker, status := m.getProviderResources(name)
 52 | 		if provider == nil || breaker == nil || !status.Healthy {
 53 | 			continue
 54 | 		}
 55 | 
 56 | 		// Try the current provider
 57 | 		currentResult := m.executeOperation(ctx, operation, provider, breaker, status, name)
 58 | 		lastResult = currentResult
 59 | 
 60 | 		if currentResult.err == nil {
 61 | 			// Success case - return immediately
 62 | 			return currentResult, nil
 63 | 		}
 64 | 
 65 | 		// **Key Insight**
 66 | 		// =================
 67 | 		//
 68 | 		// The key insight nderstand the relationship between single-request behavior and cross-request state.
 69 | 		// The circuit breaker maintains state across requests, but each individual request needs clear, predictable behavior.
 70 | 
 71 | 		// **Request Flow**
 72 | 		// ===============
 73 | 		//
 74 | 		// When the first request comes in:
 75 | 		// 1. The breaker is closed (not open).
 76 | 		// 2. We hit the else clause.
 77 | 		// 3. We return the primary error immediately.
 78 | 		// 4. This failure gets recorded in the circuit breaker's state.
 79 | 
 80 | 		// For the second request:
 81 | 		// 1. The primary provider fails again.
 82 | 		// 2. This triggers the circuit breaker to open.
 83 | 		// 3. Because the breaker is now open, we hit the first condition.
 84 | 		// 4. The continue statement moves us to try the backup provider.
 85 | 		// 5. All of this happens within the same request.
 86 | 
 87 | 		// **Properties Maintained**
 88 | 		// =======================
 89 | 		//
 90 | 		// This pattern maintains two important properties:
 91 | 		// 1. **Isolation**: Each request has clear, predictable behavior.
 92 | 		// 2. **State Evolution**: The circuit breaker accumulates state across requests.
 93 | 
 94 | 		// Circuit Breaker Logic
 95 | 		if breaker.State() == gobreaker.StateOpen {
 96 | 			// If the circuit breaker is open, we check if we're at the last provider in the preference list.
 97 | 			// If we are, we return the primary error immediately.
 98 | 			if name == preference[len(preference)-1] {
 99 | 				return currentResult, currentResult.err // This gives us the immediate failure
100 | 			}
101 | 			// Continue to the next provider if we are not at the last one.
102 | 			continue
103 | 		} else {
104 | 			// If the breaker is closed, we return the primary error immediately.
105 | 			return currentResult, currentResult.err // This gives us the immediate failure
106 | 		}
107 | 	}
108 | 
109 | 	// Error Handling
110 | 	// We always maintain a valid result structure to prevent nil pointer dereference.
111 | 	if lastResult == nil {
112 | 		return &result{
113 | 			err: fmt.Errorf("no healthy provider available"),
114 | 		}, fmt.Errorf("no healthy provider available")
115 | 	}
116 | 
117 | 	return lastResult, lastResult.err
118 | }
119 | 
120 | // executeOperation handles a single operation attempt with proper resource cleanup
121 | func (m *Manager) executeOperation(
122 | 	ctx context.Context,
123 | 	operation func(llm gollm.LLM) error,
124 | 	provider gollm.LLM,
125 | 	breaker *circuitbreaker.CircuitBreaker,
126 | 	status HealthStatus,
127 | 	name string) *result {
128 | 
129 | 	start := time.Now()
130 | 
131 | 	err := breaker.Execute(func() error {
132 | 		// Always check context before executing operation
133 | 		if err := ctx.Err(); err != nil {
134 | 			return err
135 | 		}
136 | 		return operation(provider)
137 | 	})
138 | 
139 | 	duration := time.Since(start)
140 | 	breakerState := breaker.State()
141 | 	breakerCounts := breaker.Counts()
142 | 
143 | 	if err != nil {
144 | 		m.logger.Debug("operation failed",
145 | 			zap.String("provider", name),
146 | 			zap.Error(err),
147 | 			zap.Duration("duration", duration),
148 | 			zap.String("breaker_state", breakerState.String()),
149 | 			zap.Uint32("consecutive_failures", breakerCounts.ConsecutiveFailures))
150 | 
151 | 		return &result{
152 | 			err: err,
153 | 			status: HealthStatus{
154 | 				Healthy:          false,
155 | 				LastCheck:        time.Now(),
156 | 				ErrorCount:       status.ErrorCount + 1,
157 | 				ConsecutiveFails: int(breakerCounts.ConsecutiveFailures),
158 | 				Latency:          duration,
159 | 				RequestCount:     status.RequestCount + 1,
160 | 			},
161 | 			name: name,
162 | 		}
163 | 	}
164 | 
165 | 	return &result{
166 | 		err: nil,
167 | 		status: HealthStatus{
168 | 			Healthy:          true,
169 | 			LastCheck:        time.Now(),
170 | 			ErrorCount:       0,
171 | 			ConsecutiveFails: 0,
172 | 			Latency:          duration,
173 | 			RequestCount:     status.RequestCount + 1,
174 | 		},
175 | 		name: name,
176 | 	}
177 | }
178 | 
179 | // generateRequestKey creates a consistent key based on the prompt content and role
180 | func (m *Manager) generateRequestKey(prompt *gollm.Prompt) string {
181 | 	return fmt.Sprintf("%s-%s", prompt.Messages[0].Content, prompt.Messages[0].Role)
182 | }
183 | 
184 | // getProviderPreference safely retrieves the current provider preference list
185 | func (m *Manager) getProviderPreference() []string {
186 | 	m.mu.RLock()
187 | 	defer m.mu.RUnlock()
188 | 	preference := make([]string, len(m.cfg.ProviderPreference))
189 | 	copy(preference, m.cfg.ProviderPreference)
190 | 	return preference
191 | }
192 | 
193 | // getProviderResources safely retrieves provider-related resources
194 | func (m *Manager) getProviderResources(name string) (gollm.LLM, *circuitbreaker.CircuitBreaker, HealthStatus) {
195 | 	m.mu.RLock()
196 | 	defer m.mu.RUnlock()
197 | 
198 | 	provider, exists := m.providers[name]
199 | 	if !exists {
200 | 		return nil, nil, HealthStatus{}
201 | 	}
202 | 
203 | 	return provider, m.breakers[name], m.GetHealthStatus(name)
204 | }
205 | 
206 | // handleRequestMetrics updates metrics for deduplicated requests
207 | func (m *Manager) handleRequestMetrics(shared bool) {
208 | 	if shared {
209 | 		m.deduplicatedRequests.Inc()
210 | 	}
211 | }
212 | 
213 | // processResult handles the final result and updates provider health status
214 | func (m *Manager) processResult(r *result) error {
215 | 	if r.name != "" {
216 | 		m.UpdateHealthStatus(r.name, r.status)
217 | 	}
218 | 	return r.err
219 | }
220 | 


--------------------------------------------------------------------------------
/server/provider/health.go:
--------------------------------------------------------------------------------
  1 | package provider
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"time"
  6 | 
  7 | 	"github.com/prometheus/client_golang/prometheus"
  8 | 	"github.com/teilomillet/gollm"
  9 | 	"go.uber.org/zap"
 10 | )
 11 | 
 12 | // HealthStatus represents the current health state of a provider
 13 | type HealthStatus struct {
 14 | 	Healthy          bool          // Whether the provider is currently healthy
 15 | 	LastCheck        time.Time     // When the last health check was performed
 16 | 	ConsecutiveFails int           // Number of consecutive failures
 17 | 	Latency          time.Duration // Last observed latency
 18 | 	ErrorCount       int64         // Total number of errors
 19 | 	RequestCount     int64         // Total number of requests
 20 | }
 21 | 
 22 | // startHealthChecks begins monitoring all providers
 23 | func (m *Manager) startHealthChecks(ctx context.Context) {
 24 | 	interval := time.Minute
 25 | 	ticker := time.NewTicker(interval)
 26 | 	defer ticker.Stop()
 27 | 
 28 | 	for {
 29 | 		select {
 30 | 		case <-ctx.Done():
 31 | 			return
 32 | 		case <-ticker.C:
 33 | 			m.checkAllProviders()
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | // checkAllProviders performs health checks on all providers
 39 | func (m *Manager) checkAllProviders() {
 40 | 	for name, provider := range m.providers {
 41 | 		start := time.Now()
 42 | 
 43 | 		// Get the current health status
 44 | 		var status HealthStatus
 45 | 		if val, ok := m.healthStates.Load(name); ok {
 46 | 			status = val.(HealthStatus)
 47 | 		}
 48 | 
 49 | 		// Perform health check
 50 | 		err := m.healthCheck(provider)
 51 | 		duration := time.Since(start)
 52 | 
 53 | 		// Update metrics
 54 | 		m.healthCheckDuration.Observe(duration.Seconds())
 55 | 
 56 | 		if err != nil {
 57 | 			m.healthCheckErrors.WithLabelValues(name).Inc()
 58 | 			status.Healthy = false
 59 | 			status.ErrorCount++
 60 | 		} else {
 61 | 			status.Healthy = true
 62 | 			status.ErrorCount = 0
 63 | 		}
 64 | 
 65 | 		status.LastCheck = time.Now()
 66 | 		m.UpdateHealthStatus(name, status)
 67 | 	}
 68 | }
 69 | 
 70 | // CheckProviderHealth performs a health check on a provider
 71 | func (m *Manager) CheckProviderHealth(name string, llm gollm.LLM) HealthStatus {
 72 | 	return m.checkProviderHealth(name, llm)
 73 | }
 74 | 
 75 | // checkProviderHealth performs a health check on a provider
 76 | func (m *Manager) checkProviderHealth(name string, llm gollm.LLM) HealthStatus {
 77 | 	start := time.Now()
 78 | 	status := HealthStatus{
 79 | 		LastCheck: start,
 80 | 		Healthy:   true,
 81 | 	}
 82 | 
 83 | 	// Get previous status if any
 84 | 	if val, ok := m.healthStates.Load(name); ok {
 85 | 		prevStatus := val.(HealthStatus)
 86 | 		status.ConsecutiveFails = prevStatus.ConsecutiveFails
 87 | 		status.ErrorCount = prevStatus.ErrorCount
 88 | 		status.RequestCount = prevStatus.RequestCount
 89 | 	}
 90 | 
 91 | 	// Simple health check prompt
 92 | 	prompt := &gollm.Prompt{
 93 | 		Messages: []gollm.PromptMessage{
 94 | 			{Role: "user", Content: "health check"},
 95 | 		},
 96 | 	}
 97 | 
 98 | 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 99 | 	defer cancel()
100 | 
101 | 	_, err := llm.Generate(ctx, prompt)
102 | 	status.Latency = time.Since(start)
103 | 	m.healthCheckDuration.Observe(status.Latency.Seconds())
104 | 
105 | 	if err != nil {
106 | 		status.Healthy = false
107 | 		status.ConsecutiveFails++
108 | 		status.ErrorCount++
109 | 		m.healthCheckErrors.WithLabelValues(name).Inc()
110 | 		m.logger.Warn("Provider health check failed",
111 | 			zap.String("provider", name),
112 | 			zap.Error(err),
113 | 			zap.Duration("latency", status.Latency),
114 | 		)
115 | 	} else {
116 | 		status.ConsecutiveFails = 0
117 | 	}
118 | 
119 | 	status.RequestCount++
120 | 	return status
121 | }
122 | 
123 | // GetHealthCheckErrors returns the health check errors counter for testing
124 | func (m *Manager) GetHealthCheckErrors() *prometheus.CounterVec {
125 | 	return m.healthCheckErrors
126 | }
127 | 
128 | // GetHealthStatus returns the health status for a provider
129 | func (m *Manager) GetHealthStatus(name string) HealthStatus {
130 | 	if val, ok := m.healthStates.Load(name); ok {
131 | 		return val.(HealthStatus)
132 | 	}
133 | 	return HealthStatus{}
134 | }
135 | 
136 | // UpdateHealthStatus updates the health status for a provider
137 | func (m *Manager) UpdateHealthStatus(name string, status HealthStatus) {
138 | 	m.mu.Lock()
139 | 	defer m.mu.Unlock()
140 | 
141 | 	// Get the current status
142 | 	var currentStatus HealthStatus
143 | 	if val, ok := m.healthStates.Load(name); ok {
144 | 		currentStatus = val.(HealthStatus)
145 | 	}
146 | 
147 | 	// Update the status
148 | 	newStatus := HealthStatus{
149 | 		Healthy:    status.Healthy,
150 | 		LastCheck:  status.LastCheck,
151 | 		ErrorCount: status.ErrorCount,
152 | 	}
153 | 
154 | 	// If the status is becoming healthy, reset error count
155 | 	if status.Healthy && !currentStatus.Healthy {
156 | 		newStatus.ErrorCount = 0
157 | 	}
158 | 
159 | 	// Store the new status
160 | 	m.healthStates.Store(name, newStatus)
161 | 
162 | 	// Update metrics
163 | 	if status.Healthy {
164 | 		m.healthyProviders.WithLabelValues(name).Set(1)
165 | 	} else {
166 | 		m.healthyProviders.WithLabelValues(name).Set(0)
167 | 	}
168 | }
169 | 
170 | func (m *Manager) healthCheck(provider gollm.LLM) error {
171 | 	// Simple health check prompt
172 | 	prompt := &gollm.Prompt{
173 | 		Messages: []gollm.PromptMessage{
174 | 			{Role: "user", Content: "health check"},
175 | 		},
176 | 	}
177 | 
178 | 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
179 | 	defer cancel()
180 | 
181 | 	_, err := provider.Generate(ctx, prompt)
182 | 	return err
183 | }
184 | 
185 | // PerformHealthCheck performs a health check on all providers
186 | func (m *Manager) PerformHealthCheck() {
187 | 	for name, provider := range m.providers {
188 | 		start := time.Now()
189 | 
190 | 		// Get the current health status
191 | 		var status HealthStatus
192 | 		if val, ok := m.healthStates.Load(name); ok {
193 | 			status = val.(HealthStatus)
194 | 		}
195 | 
196 | 		// Perform health check
197 | 		err := m.healthCheck(provider)
198 | 		duration := time.Since(start)
199 | 
200 | 		// Update metrics
201 | 		m.healthCheckDuration.Observe(duration.Seconds())
202 | 
203 | 		if err != nil {
204 | 			m.healthCheckErrors.WithLabelValues(name).Inc()
205 | 			status.Healthy = false
206 | 			status.ErrorCount++
207 | 		} else {
208 | 			status.Healthy = true
209 | 			status.ErrorCount = 0
210 | 		}
211 | 
212 | 		status.LastCheck = time.Now()
213 | 		m.UpdateHealthStatus(name, status)
214 | 	}
215 | }
216 | 


--------------------------------------------------------------------------------
/server/provider/manager_singleflight_test.go:
--------------------------------------------------------------------------------
  1 | package provider
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"sync/atomic"
  8 | 	"testing"
  9 | 	"time"
 10 | 
 11 | 	"github.com/prometheus/client_golang/prometheus"
 12 | 	"github.com/stretchr/testify/assert"
 13 | 	"github.com/stretchr/testify/require"
 14 | 	"github.com/teilomillet/gollm"
 15 | 	"github.com/teilomillet/hapax/config"
 16 | 	"github.com/teilomillet/hapax/server/mocks"
 17 | 	"go.uber.org/zap"
 18 | )
 19 | 
 20 | func TestManagerSingleflight(t *testing.T) {
 21 | 	t.Parallel()
 22 | 
 23 | 	tests := []struct {
 24 | 		name   string
 25 | 		testFn func(*testing.T, *Manager)
 26 | 	}{
 27 | 		{
 28 | 			name: "Concurrent identical requests are deduplicated",
 29 | 			testFn: func(t *testing.T, m *Manager) {
 30 | 				var callCount atomic.Int32
 31 | 				mock := mocks.NewMockLLMWithConfig("test", "model", func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
 32 | 					callCount.Add(1)
 33 | 					// Small sleep to ensure concurrent requests overlap
 34 | 					time.Sleep(10 * time.Millisecond)
 35 | 					return "response", nil
 36 | 				})
 37 | 
 38 | 				m.SetProviders(map[string]gollm.LLM{"test": mock})
 39 | 				m.UpdateHealthStatus("test", HealthStatus{
 40 | 					Healthy:    true,
 41 | 					LastCheck:  time.Now(),
 42 | 					ErrorCount: 0,
 43 | 				})
 44 | 
 45 | 				// Create identical prompts
 46 | 				prompt := &gollm.Prompt{Messages: []gollm.PromptMessage{{
 47 | 					Role:    "user",
 48 | 					Content: "test",
 49 | 				}}}
 50 | 
 51 | 				// Launch concurrent requests
 52 | 				var wg sync.WaitGroup
 53 | 				errs := make([]error, 5)
 54 | 				for i := 0; i < 5; i++ {
 55 | 					wg.Add(1)
 56 | 					go func(idx int) {
 57 | 						defer wg.Done()
 58 | 						errs[idx] = m.Execute(context.Background(), func(llm gollm.LLM) error {
 59 | 							_, err := llm.Generate(context.Background(), prompt)
 60 | 							return err
 61 | 						}, prompt)
 62 | 					}(i)
 63 | 				}
 64 | 
 65 | 				waitWithTimeout(&wg, t, 100*time.Millisecond)
 66 | 
 67 | 				// Verify results
 68 | 				for _, err := range errs {
 69 | 					assert.NoError(t, err)
 70 | 				}
 71 | 
 72 | 				// Should only be called once due to deduplication
 73 | 				assert.Equal(t, int32(1), callCount.Load())
 74 | 			},
 75 | 		},
 76 | 		{
 77 | 			name: "Different requests are not deduplicated",
 78 | 			testFn: func(t *testing.T, m *Manager) {
 79 | 				var callCount atomic.Int32
 80 | 				mock := mocks.NewMockLLMWithConfig("test", "model", func(ctx context.Context, prompt *gollm.Prompt) (string, error) {
 81 | 					callCount.Add(1)
 82 | 					time.Sleep(10 * time.Millisecond)
 83 | 					return "response", nil
 84 | 				})
 85 | 
 86 | 				m.SetProviders(map[string]gollm.LLM{"test": mock})
 87 | 				m.UpdateHealthStatus("test", HealthStatus{
 88 | 					Healthy:    true,
 89 | 					LastCheck:  time.Now(),
 90 | 					ErrorCount: 0,
 91 | 				})
 92 | 
 93 | 				var wg sync.WaitGroup
 94 | 				for i := 0; i < 3; i++ {
 95 | 					wg.Add(1)
 96 | 					go func(idx int) {
 97 | 						defer wg.Done()
 98 | 						// Different prompts
 99 | 						prompt := &gollm.Prompt{Messages: []gollm.PromptMessage{{
100 | 							Role:    "user",
101 | 							Content: fmt.Sprintf("test-%d", idx),
102 | 						}}}
103 | 						_ = m.Execute(context.Background(), func(llm gollm.LLM) error {
104 | 							_, err := llm.Generate(context.Background(), prompt)
105 | 							return err
106 | 						}, prompt)
107 | 					}(i)
108 | 				}
109 | 
110 | 				waitWithTimeout(&wg, t, 100*time.Millisecond)
111 | 				assert.Equal(t, int32(3), callCount.Load())
112 | 			},
113 | 		},
114 | 	}
115 | 
116 | 	for _, tt := range tests {
117 | 		tt := tt
118 | 		t.Run(tt.name, func(t *testing.T) {
119 | 			t.Parallel()
120 | 			cfg := &config.Config{
121 | 				TestMode: true,
122 | 				Providers: map[string]config.ProviderConfig{
123 | 					"test": {Type: "test", Model: "model"},
124 | 				},
125 | 				ProviderPreference: []string{"test"},
126 | 				CircuitBreaker: config.CircuitBreakerConfig{
127 | 					MaxRequests:      1,
128 | 					Interval:         10 * time.Millisecond,
129 | 					Timeout:          10 * time.Millisecond,
130 | 					FailureThreshold: 2,
131 | 					TestMode:         true,
132 | 				},
133 | 			}
134 | 
135 | 			manager, err := NewManager(cfg, zap.NewNop(), prometheus.NewRegistry())
136 | 			require.NoError(t, err)
137 | 			tt.testFn(t, manager)
138 | 		})
139 | 	}
140 | }
141 | 
142 | // Helper function to wait for WaitGroup with timeout
143 | func waitWithTimeout(wg *sync.WaitGroup, t *testing.T, timeout time.Duration) {
144 | 	done := make(chan struct{})
145 | 	go func() {
146 | 		wg.Wait()
147 | 		close(done)
148 | 	}()
149 | 
150 | 	select {
151 | 	case <-done:
152 | 		// Success path - continue
153 | 	case <-time.After(timeout):
154 | 		t.Fatal("Test timed out waiting for concurrent requests")
155 | 	}
156 | }
157 | 


--------------------------------------------------------------------------------
/server/provider/metrics.go:
--------------------------------------------------------------------------------
 1 | package provider
 2 | 
 3 | import "github.com/prometheus/client_golang/prometheus"
 4 | 
 5 | // initializeMetrics sets up Prometheus metrics
 6 | func (m *Manager) initializeMetrics(registry *prometheus.Registry) {
 7 | 	m.healthCheckDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
 8 | 		Name: "hapax_health_check_duration_seconds",
 9 | 		Help: "Duration of provider health checks",
10 | 	})
11 | 
12 | 	m.healthCheckErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
13 | 		Name: "hapax_health_check_errors_total",
14 | 		Help: "Number of health check errors by provider",
15 | 	}, []string{"provider"})
16 | 
17 | 	m.requestLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{
18 | 		Name: "hapax_request_latency_seconds",
19 | 		Help: "Latency of provider requests",
20 | 	}, []string{"provider"})
21 | 
22 | 	m.deduplicatedRequests = prometheus.NewCounter(prometheus.CounterOpts{
23 | 		Name: "hapax_deduplicated_requests_total",
24 | 		Help: "Number of deduplicated requests",
25 | 	})
26 | 
27 | 	m.healthyProviders = prometheus.NewGaugeVec(prometheus.GaugeOpts{
28 | 		Name: "hapax_healthy_providers",
29 | 		Help: "Number of healthy providers",
30 | 	}, []string{"provider"})
31 | 
32 | 	registry.MustRegister(m.healthCheckDuration)
33 | 	registry.MustRegister(m.healthCheckErrors)
34 | 	registry.MustRegister(m.requestLatency)
35 | 	registry.MustRegister(m.deduplicatedRequests)
36 | 	registry.MustRegister(m.healthyProviders)
37 | }
38 | 


--------------------------------------------------------------------------------
/server/provider/provider.go:
--------------------------------------------------------------------------------
  1 | package provider
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 	"time"
  8 | 
  9 | 	"github.com/prometheus/client_golang/prometheus"
 10 | 	"github.com/sony/gobreaker"
 11 | 	"github.com/teilomillet/gollm"
 12 | 	"github.com/teilomillet/hapax/config"
 13 | 	"github.com/teilomillet/hapax/server/circuitbreaker"
 14 | 	"go.uber.org/zap"
 15 | 	"golang.org/x/sync/singleflight"
 16 | )
 17 | 
 18 | // Manager handles LLM provider management and selection
 19 | type Manager struct {
 20 | 	providers    map[string]gollm.LLM
 21 | 	breakers     map[string]*circuitbreaker.CircuitBreaker
 22 | 	healthStates sync.Map // map[string]HealthStatus
 23 | 	logger       *zap.Logger
 24 | 	cfg          *config.Config
 25 | 	mu           sync.RWMutex
 26 | 	group        *singleflight.Group // For deduplicating identical requests
 27 | 
 28 | 	// Metrics
 29 | 	registry             *prometheus.Registry
 30 | 	healthCheckDuration  prometheus.Histogram
 31 | 	healthCheckErrors    *prometheus.CounterVec
 32 | 	requestLatency       *prometheus.HistogramVec
 33 | 	deduplicatedRequests prometheus.Counter // New metric for tracking deduplicated requests
 34 | 	healthyProviders     *prometheus.GaugeVec
 35 | }
 36 | 
 37 | // NewManager creates a new provider manager
 38 | func NewManager(cfg *config.Config, logger *zap.Logger, registry *prometheus.Registry) (*Manager, error) {
 39 | 	m := &Manager{
 40 | 		providers: make(map[string]gollm.LLM),
 41 | 		breakers:  make(map[string]*circuitbreaker.CircuitBreaker),
 42 | 		logger:    logger,
 43 | 		cfg:       cfg,
 44 | 		registry:  registry,
 45 | 		group:     &singleflight.Group{},
 46 | 	}
 47 | 
 48 | 	// Initialize metrics
 49 | 	m.initializeMetrics(registry)
 50 | 
 51 | 	// Initialize providers from both new and legacy configs
 52 | 	if !cfg.TestMode {
 53 | 		if err := m.initializeProviders(); err != nil {
 54 | 			return nil, err
 55 | 		}
 56 | 	}
 57 | 
 58 | 	// Start health checks if enabled
 59 | 	if cfg.LLM.HealthCheck != nil && cfg.LLM.HealthCheck.Enabled {
 60 | 		go m.startHealthChecks(context.Background())
 61 | 	}
 62 | 
 63 | 	return m, nil
 64 | }
 65 | 
 66 | // initializeProviders sets up LLM providers based on configuration
 67 | func (m *Manager) initializeProviders() error {
 68 | 	m.providers = make(map[string]gollm.LLM)
 69 | 	m.breakers = make(map[string]*circuitbreaker.CircuitBreaker)
 70 | 
 71 | 	for name, cfg := range m.cfg.Providers {
 72 | 		provider, err := m.initializeProvider(name, cfg)
 73 | 		if err != nil {
 74 | 			return fmt.Errorf("failed to initialize provider %s: %w", name, err)
 75 | 		}
 76 | 
 77 | 		m.providers[name] = provider
 78 | 		m.logger.Info("Created LLM",
 79 | 			zap.String("provider", name),
 80 | 			zap.String("model", cfg.Model),
 81 | 			zap.Int("api_key_length", len(cfg.APIKey)))
 82 | 
 83 | 		// Initialize provider as healthy
 84 | 		m.UpdateHealthStatus(name, HealthStatus{
 85 | 			Healthy:    true,
 86 | 			LastCheck:  time.Now(),
 87 | 			ErrorCount: 0,
 88 | 		})
 89 | 
 90 | 		// Initialize circuit breaker with gobreaker configuration
 91 | 		cbConfig := circuitbreaker.Config{
 92 | 			Name:             name,
 93 | 			MaxRequests:      1,               // Allow 1 request in half-open state
 94 | 			Interval:         time.Minute * 2, // Cyclic period of closed state
 95 | 			Timeout:          time.Minute,     // Period of open state
 96 | 			FailureThreshold: 3,               // Trip after 3 failures
 97 | 			TestMode:         m.cfg.CircuitBreaker.TestMode,
 98 | 		}
 99 | 
100 | 		// Override with config values if provided
101 | 		if m.cfg.CircuitBreaker.Timeout > 0 {
102 | 			cbConfig.Timeout = m.cfg.CircuitBreaker.Timeout
103 | 		}
104 | 		if m.cfg.CircuitBreaker.MaxRequests > 0 {
105 | 			cbConfig.MaxRequests = m.cfg.CircuitBreaker.MaxRequests
106 | 		}
107 | 
108 | 		breaker, err := circuitbreaker.NewCircuitBreaker(cbConfig, m.logger, m.registry)
109 | 		if err != nil {
110 | 			return fmt.Errorf("failed to create circuit breaker for %s: %w", name, err)
111 | 		}
112 | 		m.breakers[name] = breaker
113 | 	}
114 | 
115 | 	return nil
116 | }
117 | 
118 | // initializeProvider initializes a single LLM provider
119 | func (m *Manager) initializeProvider(_ string, cfg config.ProviderConfig) (gollm.LLM, error) {
120 | 	provider, err := gollm.NewLLM(
121 | 		gollm.SetProvider(cfg.Type),
122 | 		gollm.SetModel(cfg.Model),
123 | 		gollm.SetAPIKey(cfg.APIKey),
124 | 	)
125 | 	if err != nil {
126 | 		return nil, err
127 | 	}
128 | 
129 | 	return provider, nil
130 | }
131 | 
132 | // GetProvider returns a healthy provider or error if none available
133 | func (m *Manager) GetProvider() (gollm.LLM, error) {
134 | 	m.mu.RLock()
135 | 	defer m.mu.RUnlock()
136 | 
137 | 	// Try each provider in order of preference
138 | 	for _, name := range m.cfg.ProviderPreference {
139 | 		provider, exists := m.providers[name]
140 | 		if !exists {
141 | 			continue
142 | 		}
143 | 
144 | 		// Skip if provider is unhealthy
145 | 		status := m.GetHealthStatus(name)
146 | 		if !status.Healthy {
147 | 			continue
148 | 		}
149 | 
150 | 		// Skip if circuit breaker is open
151 | 		breaker := m.breakers[name]
152 | 		if breaker != nil && breaker.State() == gobreaker.StateOpen {
153 | 			continue
154 | 		}
155 | 
156 | 		return provider, nil
157 | 	}
158 | 
159 | 	return nil, fmt.Errorf("no healthy provider available")
160 | }
161 | 
162 | // SetProviders replaces the current providers with new ones (for testing)
163 | func (m *Manager) SetProviders(providers map[string]gollm.LLM) {
164 | 	m.mu.Lock()
165 | 	defer m.mu.Unlock()
166 | 
167 | 	// Clear existing providers and breakers
168 | 	m.providers = make(map[string]gollm.LLM)
169 | 	m.breakers = make(map[string]*circuitbreaker.CircuitBreaker)
170 | 
171 | 	// Set up new providers
172 | 	for name, provider := range providers {
173 | 		m.providers[name] = provider
174 | 
175 | 		// Create circuit breaker for provider
176 | 		cbConfig := circuitbreaker.Config{
177 | 			Name:             name,
178 | 			MaxRequests:      1,
179 | 			Interval:         time.Second,
180 | 			Timeout:          m.cfg.CircuitBreaker.Timeout,
181 | 			FailureThreshold: 2,
182 | 			TestMode:         m.cfg.CircuitBreaker.TestMode,
183 | 		}
184 | 
185 | 		breaker, err := circuitbreaker.NewCircuitBreaker(cbConfig, m.logger, m.registry)
186 | 		if err != nil {
187 | 			m.logger.Error("Failed to create circuit breaker",
188 | 				zap.String("provider", name),
189 | 				zap.Error(err))
190 | 			continue
191 | 		}
192 | 
193 | 		m.breakers[name] = breaker
194 | 
195 | 		// Initialize health status directly without calling UpdateHealthStatus
196 | 		m.healthStates.Store(name, HealthStatus{
197 | 			Healthy:    true,
198 | 			LastCheck:  time.Now(),
199 | 			ErrorCount: 0,
200 | 		})
201 | 	}
202 | 
203 | 	// Create a map to track which providers have been added to the preference list
204 | 	added := make(map[string]bool)
205 | 
206 | 	// Keep existing provider preference order for providers that still exist
207 | 	newPreference := make([]string, 0, len(providers))
208 | 	for _, name := range m.cfg.ProviderPreference {
209 | 		if _, exists := providers[name]; exists {
210 | 			newPreference = append(newPreference, name)
211 | 			added[name] = true
212 | 		}
213 | 	}
214 | 
215 | 	// Add any new providers that weren't in the original preference list
216 | 	for name := range providers {
217 | 		if !added[name] {
218 | 			newPreference = append(newPreference, name)
219 | 		}
220 | 	}
221 | 
222 | 	m.cfg.ProviderPreference = newPreference
223 | 	m.logger.Debug("updated provider preference list", zap.Strings("preference", newPreference))
224 | }
225 | 


--------------------------------------------------------------------------------
/server/routing/metrics.go:
--------------------------------------------------------------------------------
 1 | package routing
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"github.com/teilomillet/hapax/server/metrics"
 7 | )
 8 | 
 9 | // RegisterMetricsRoutes adds routes for Prometheus metrics
10 | func RegisterMetricsRoutes(mux *http.ServeMux, m *metrics.Metrics) {
11 | 	mux.Handle("/metrics", m.Handler())
12 | }
13 | 


--------------------------------------------------------------------------------
/server/routing/metrics_test.go:
--------------------------------------------------------------------------------
 1 | package routing
 2 | 
 3 | import (
 4 | 	"io"
 5 | 	"net/http"
 6 | 	"net/http/httptest"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/teilomillet/hapax/server/metrics"
11 | )
12 | 
13 | func TestRegisterMetricsRoutes(t *testing.T) {
14 | 	// Create new metrics instance for testing
15 | 	m := metrics.NewMetrics()
16 | 
17 | 	// Create new mux
18 | 	mux := http.NewServeMux()
19 | 	RegisterMetricsRoutes(mux, m)
20 | 
21 | 	// Create test server
22 | 	server := httptest.NewServer(mux)
23 | 	defer server.Close()
24 | 
25 | 	// Make a test request to increment some metrics
26 | 	m.RequestsTotal.WithLabelValues("/test", "200").Inc()
27 | 	m.ErrorsTotal.WithLabelValues("server_error").Inc()
28 | 	m.RateLimitHits.WithLabelValues("test_client").Inc()
29 | 
30 | 	// Test metrics endpoint
31 | 	resp, err := http.Get(server.URL + "/metrics")
32 | 	assert.NoError(t, err)
33 | 	defer resp.Body.Close()
34 | 
35 | 	// Check response code
36 | 	assert.Equal(t, http.StatusOK, resp.StatusCode)
37 | 
38 | 	// Check content type
39 | 	contentType := resp.Header.Get("Content-Type")
40 | 	assert.Contains(t, contentType, "text/plain")
41 | 
42 | 	// Read response body
43 | 	body, err := io.ReadAll(resp.Body)
44 | 	assert.NoError(t, err)
45 | 
46 | 	// Verify response contains our metrics
47 | 	bodyStr := string(body)
48 | 	expectedMetrics := []string{
49 | 		"hapax_http_requests_total",
50 | 		"hapax_errors_total",
51 | 		"hapax_rate_limit_hits_total",
52 | 	}
53 | 
54 | 	for _, metric := range expectedMetrics {
55 | 		assert.Contains(t, bodyStr, metric, "response should contain metric '%s'", metric)
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/server/validation/middleware_test.go:
--------------------------------------------------------------------------------
  1 | package validation
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"net/http"
  7 | 	"net/http/httptest"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/teilomillet/hapax/config"
 12 | )
 13 | 
 14 | func TestValidateCompletion(t *testing.T) {
 15 | 	// Initialize middleware with config
 16 | 	cfg := &config.Config{
 17 | 		LLM: config.LLMConfig{
 18 | 			Model:            "gpt-4",
 19 | 			MaxContextTokens: 100,
 20 | 		},
 21 | 	}
 22 | 	err := Initialize(cfg)
 23 | 	assert.NoError(t, err)
 24 | 
 25 | 	tests := []struct {
 26 | 		name            string
 27 | 		contentType     string
 28 | 		body            interface{}
 29 | 		expectedStatus  int
 30 | 		expectedError   bool
 31 | 		expectedDetails map[string]string // Map of field names to expected error messages
 32 | 		expectedCode    string            // Expected error code
 33 | 		suggestion      string            // Expected suggestion message
 34 | 	}{
 35 | 		{
 36 | 			name:        "valid request",
 37 | 			contentType: "application/json",
 38 | 			body: CompletionRequest{
 39 | 				Messages: []Message{
 40 | 					{Role: "user", Content: "Hello"},
 41 | 				},
 42 | 			},
 43 | 			expectedStatus: http.StatusOK,
 44 | 			expectedError:  false,
 45 | 		},
 46 | 		{
 47 | 			name:        "missing required content field",
 48 | 			contentType: "application/json",
 49 | 			body: CompletionRequest{
 50 | 				Messages: []Message{
 51 | 					{Role: "user", Content: ""}, // Empty content
 52 | 				},
 53 | 			},
 54 | 			expectedStatus: http.StatusUnprocessableEntity,
 55 | 			expectedError:  true,
 56 | 			expectedDetails: map[string]string{
 57 | 				"messages[0].content": "field 'content' is required",
 58 | 			},
 59 | 			expectedCode: "required_validation_failed",
 60 | 			suggestion:   "The request format is correct but the content is invalid",
 61 | 		},
 62 | 		{
 63 | 			name:        "invalid role value",
 64 | 			contentType: "application/json",
 65 | 			body: CompletionRequest{
 66 | 				Messages: []Message{
 67 | 					{Role: "invalid", Content: "Hello"},
 68 | 				},
 69 | 			},
 70 | 			expectedStatus: http.StatusUnprocessableEntity,
 71 | 			expectedError:  true,
 72 | 			expectedDetails: map[string]string{
 73 | 				"messages[0].role": "role must be one of: user, assistant, system",
 74 | 			},
 75 | 			expectedCode: "oneof_validation_failed",
 76 | 			suggestion:   "The request format is correct but the content is invalid",
 77 | 		},
 78 | 		{
 79 | 			name:        "invalid role value",
 80 | 			contentType: "application/json",
 81 | 			body: CompletionRequest{
 82 | 				Messages: []Message{
 83 | 					{Role: "invalid", Content: "Hello"},
 84 | 				},
 85 | 			},
 86 | 			expectedStatus: http.StatusUnprocessableEntity,
 87 | 			expectedError:  true,
 88 | 			expectedDetails: map[string]string{
 89 | 				"messages[0].role": "role must be one of: user, assistant, system",
 90 | 			},
 91 | 			expectedCode: "oneof_validation_failed",
 92 | 			suggestion:   "The request format is correct but the content is invalid",
 93 | 		},
 94 | 		{
 95 | 			name:        "token limit exceeded",
 96 | 			contentType: "application/json",
 97 | 			body: CompletionRequest{
 98 | 				Messages: []Message{
 99 | 					{Role: "user", Content: string(make([]byte, 1000))}, // Large content
100 | 				},
101 | 			},
102 | 			expectedStatus: http.StatusUnprocessableEntity,
103 | 			expectedError:  true,
104 | 			expectedDetails: map[string]string{
105 | 				"messages": "token limit exceeded",
106 | 			},
107 | 			expectedCode: "token_limit_exceeded",
108 | 			suggestion:   "The request format is correct but the content is invalid",
109 | 		},
110 | 	}
111 | 
112 | 	for _, tt := range tests {
113 | 		t.Run(tt.name, func(t *testing.T) {
114 | 			// Create request body
115 | 			var bodyBytes []byte
116 | 			var err error
117 | 
118 | 			switch v := tt.body.(type) {
119 | 			case string:
120 | 				bodyBytes = []byte(v)
121 | 			default:
122 | 				bodyBytes, err = json.Marshal(tt.body)
123 | 				assert.NoError(t, err)
124 | 			}
125 | 
126 | 			// Create request with a test request ID
127 | 			req := httptest.NewRequest(http.MethodPost, "/v1/completions", bytes.NewBuffer(bodyBytes))
128 | 			req.Header.Set("X-Request-ID", "test-request-id")
129 | 			if tt.contentType != "" {
130 | 				req.Header.Set("Content-Type", tt.contentType)
131 | 			}
132 | 
133 | 			// Create response recorder
134 | 			w := httptest.NewRecorder()
135 | 
136 | 			// Create test handler
137 | 			handler := ValidateCompletion(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
138 | 				w.WriteHeader(http.StatusOK)
139 | 			}))
140 | 
141 | 			// Handle request
142 | 			handler.ServeHTTP(w, req)
143 | 
144 | 			// Assert response status code
145 | 			assert.Equal(t, tt.expectedStatus, w.Code)
146 | 
147 | 			if tt.expectedError {
148 | 				var errorResp APIError
149 | 				err := json.Unmarshal(w.Body.Bytes(), &errorResp)
150 | 				assert.NoError(t, err, "Failed to unmarshal error response")
151 | 
152 | 				// Verify error structure
153 | 				assert.Equal(t, "validation_error", errorResp.Type)
154 | 				assert.Equal(t, "test-request-id", errorResp.RequestID)
155 | 				assert.Equal(t, tt.expectedStatus, errorResp.Code)
156 | 
157 | 				if tt.suggestion != "" {
158 | 					assert.Equal(t, tt.suggestion, errorResp.Suggestion)
159 | 				}
160 | 
161 | 				// Verify error details
162 | 				if tt.expectedDetails != nil {
163 | 					assert.Len(t, errorResp.Details, len(tt.expectedDetails))
164 | 
165 | 					// Create a map of field to error message from the response
166 | 					actualDetails := make(map[string]string)
167 | 					for _, detail := range errorResp.Details {
168 | 						actualDetails[detail.Field] = detail.Message
169 | 					}
170 | 
171 | 					// Compare expected and actual details
172 | 					for field, expectedMsg := range tt.expectedDetails {
173 | 						actualMsg, exists := actualDetails[field]
174 | 						assert.True(t, exists, "Expected error for field %s not found", field)
175 | 						assert.Equal(t, expectedMsg, actualMsg,
176 | 							"Error message mismatch for field %s", field)
177 | 					}
178 | 				}
179 | 
180 | 				// Verify error code if specified
181 | 				if tt.expectedCode != "" {
182 | 					hasExpectedCode := false
183 | 					for _, detail := range errorResp.Details {
184 | 						if detail.Code == tt.expectedCode {
185 | 							hasExpectedCode = true
186 | 							break
187 | 						}
188 | 					}
189 | 					assert.True(t, hasExpectedCode,
190 | 						"Expected error code %s not found", tt.expectedCode)
191 | 				}
192 | 			}
193 | 		})
194 | 	}
195 | }
196 | 


--------------------------------------------------------------------------------
/server/validation/schema.go:
--------------------------------------------------------------------------------
  1 | package validation
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/pkoukk/tiktoken-go"
  7 | )
  8 | 
  9 | // Tokenizer defines the interface for token counting
 10 | type Tokenizer interface {
 11 | 	Encode(text string, allowedSpecial, disallowedSpecial []string) []int
 12 | 	Decode(tokens []int) string
 13 | 	CountTokens(text string) int
 14 | }
 15 | 
 16 | // tiktokenWrapper wraps tiktoken to implement our Tokenizer interface
 17 | type tiktokenWrapper struct {
 18 | 	*tiktoken.Tiktoken
 19 | }
 20 | 
 21 | func (t *tiktokenWrapper) CountTokens(text string) int {
 22 | 	tokens := t.Encode(text, nil, nil)
 23 | 	return len(tokens)
 24 | }
 25 | 
 26 | // TokenCounter handles token counting for messages using tiktoken
 27 | type TokenCounter struct {
 28 | 	encoding Tokenizer
 29 | }
 30 | 
 31 | // NewTokenCounter creates a new token counter for the specified model
 32 | func NewTokenCounter(model string) (*TokenCounter, error) {
 33 | 	encoding, err := tiktoken.EncodingForModel(model)
 34 | 	if err != nil {
 35 | 		return nil, fmt.Errorf("failed to get encoding for model %s: %v", model, err)
 36 | 	}
 37 | 	return &TokenCounter{encoding: &tiktokenWrapper{encoding}}, nil
 38 | }
 39 | 
 40 | // CountTokens counts the total number of tokens in a message
 41 | func (tc *TokenCounter) CountTokens(msg Message) int {
 42 | 	return tc.encoding.CountTokens(msg.Content)
 43 | }
 44 | 
 45 | // CountRequestTokens counts the total number of tokens in a completion request
 46 | func (tc *TokenCounter) CountRequestTokens(req CompletionRequest) int {
 47 | 	total := 0
 48 | 	for _, msg := range req.Messages {
 49 | 		total += tc.CountTokens(msg)
 50 | 	}
 51 | 	return total
 52 | }
 53 | 
 54 | // ValidateTokens checks if the request's token count is within limits
 55 | func (tc *TokenCounter) ValidateTokens(req CompletionRequest, maxContextTokens int) error {
 56 | 	totalTokens := tc.CountRequestTokens(req)
 57 | 	if req.Options != nil && req.Options.MaxTokens > 0 {
 58 | 		totalTokens += req.Options.MaxTokens
 59 | 	}
 60 | 
 61 | 	if totalTokens > maxContextTokens {
 62 | 		return fmt.Errorf("total tokens (%d) exceeds max context length (%d)", totalTokens, maxContextTokens)
 63 | 	}
 64 | 
 65 | 	return nil
 66 | }
 67 | 
 68 | // ValidateOptions performs comprehensive validation of request options
 69 | func ValidateOptions(opts *Options) error {
 70 | 	if opts == nil {
 71 | 		return nil
 72 | 	}
 73 | 
 74 | 	var errs []error
 75 | 
 76 | 	// Validate generation parameters
 77 | 	if opts.Temperature < 0 || opts.Temperature > 1 {
 78 | 		errs = append(errs, fmt.Errorf("temperature must be between 0 and 1"))
 79 | 	}
 80 | 	if opts.TopP <= 0 || opts.TopP > 1 {
 81 | 		errs = append(errs, fmt.Errorf("top_p must be between 0 and 1"))
 82 | 	}
 83 | 	if opts.FrequencyPenalty < -2 || opts.FrequencyPenalty > 2 {
 84 | 		errs = append(errs, fmt.Errorf("frequency_penalty must be between -2 and 2"))
 85 | 	}
 86 | 	if opts.PresencePenalty < -2 || opts.PresencePenalty > 2 {
 87 | 		errs = append(errs, fmt.Errorf("presence_penalty must be between -2 and 2"))
 88 | 	}
 89 | 
 90 | 	// Validate cache options
 91 | 	if opts.Cache != nil {
 92 | 		if err := validateCacheOptions(opts.Cache); err != nil {
 93 | 			errs = append(errs, err)
 94 | 		}
 95 | 	}
 96 | 
 97 | 	// Validate retry options
 98 | 	if opts.Retry != nil {
 99 | 		if err := validateRetryOptions(opts.Retry); err != nil {
100 | 			errs = append(errs, err)
101 | 		}
102 | 	}
103 | 
104 | 	if len(errs) > 0 {
105 | 		return fmt.Errorf("validation errors: %v", errs)
106 | 	}
107 | 
108 | 	return nil
109 | }
110 | 
111 | // validateCacheOptions validates cache-specific configuration
112 | func validateCacheOptions(cache *CacheOptions) error {
113 | 	if !cache.Enable {
114 | 		return nil
115 | 	}
116 | 
117 | 	var errs []error
118 | 
119 | 	switch cache.Type {
120 | 	case "memory":
121 | 		if cache.MaxSize <= 0 {
122 | 			errs = append(errs, fmt.Errorf("max_size must be greater than 0 for memory cache"))
123 | 		}
124 | 	case "redis":
125 | 		if cache.Redis == nil {
126 | 			errs = append(errs, fmt.Errorf("redis configuration required when cache type is 'redis'"))
127 | 		}
128 | 	case "file":
129 | 		if cache.Dir == "" {
130 | 			errs = append(errs, fmt.Errorf("directory path required when cache type is 'file'"))
131 | 		}
132 | 	default:
133 | 		errs = append(errs, fmt.Errorf("invalid cache type: must be one of [memory, redis, file]"))
134 | 	}
135 | 
136 | 	if cache.TTL <= 0 {
137 | 		errs = append(errs, fmt.Errorf("cache TTL must be greater than 0"))
138 | 	}
139 | 
140 | 	if len(errs) > 0 {
141 | 		return fmt.Errorf("cache validation errors: %v", errs)
142 | 	}
143 | 
144 | 	return nil
145 | }
146 | 
147 | // validateRetryOptions validates retry-specific configuration
148 | func validateRetryOptions(retry *RetryOptions) error {
149 | 	var errs []error
150 | 
151 | 	if retry.MaxRetries <= 0 {
152 | 		errs = append(errs, fmt.Errorf("max_retries must be greater than 0"))
153 | 	}
154 | 	if retry.InitialDelay <= 0 {
155 | 		errs = append(errs, fmt.Errorf("initial_delay must be greater than 0"))
156 | 	}
157 | 	if retry.MaxDelay <= retry.InitialDelay {
158 | 		errs = append(errs, fmt.Errorf("max_delay must be greater than initial_delay"))
159 | 	}
160 | 	if retry.Multiplier <= 1 {
161 | 		errs = append(errs, fmt.Errorf("multiplier must be greater than 1"))
162 | 	}
163 | 
164 | 	validErrors := map[string]bool{
165 | 		"rate_limit":   true,
166 | 		"timeout":      true,
167 | 		"server_error": true,
168 | 	}
169 | 
170 | 	for _, errType := range retry.RetryableErrors {
171 | 		if !validErrors[errType] {
172 | 			errs = append(errs, fmt.Errorf("invalid retry error type: %s", errType))
173 | 		}
174 | 	}
175 | 
176 | 	if len(errs) > 0 {
177 | 		return fmt.Errorf("retry validation errors: %v", errs)
178 | 	}
179 | 
180 | 	return nil
181 | }
182 | 


--------------------------------------------------------------------------------