├── .env.example ├── .github └── workflows │ ├── docker-publish.yml │ └── go-ci.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── ROADMAP.md ├── TODO.md ├── bin └── golangci-lint ├── cmd └── hapax │ └── main.go ├── config.example.yaml ├── config ├── config.go ├── config_env_test.go ├── config_test.go ├── config_watcher.go ├── http3_test.go ├── processing.go └── watcher_interface.go ├── docker-compose.yml ├── docs ├── _config.yml ├── api.md ├── assets │ └── css │ │ └── custom.css ├── blog │ └── why-we-built-hapax.md ├── configuration.md ├── getting-started.md ├── getting-started │ ├── 5-minute-setup.md │ └── index.md ├── index.md ├── installation.md ├── operations.md ├── performance.md └── security.md ├── errors ├── errors.go ├── errors_test.go ├── handlers.go ├── handlers_test.go ├── responses.go ├── responses_test.go ├── types.go └── types_test.go ├── examples ├── hapax.yaml └── requests.md ├── go.mod ├── go.sum ├── golangci.yml ├── main.go ├── prometheus.yml ├── server ├── circuitbreaker │ ├── circuitbreaker.go │ └── errors.go ├── handlers │ ├── completion.go │ ├── completion_test.go │ └── integration_test.go ├── http3_0rtt_test.go ├── http3_test.go ├── metrics │ └── metrics.go ├── middleware │ ├── auth.go │ ├── constants.go │ ├── logging.go │ ├── metrics.go │ ├── metrics_test.go │ ├── middleware.go │ ├── middleware_test.go │ ├── queue.go │ ├── queue_test.go │ ├── ratelimit.go │ ├── ratelimit_test.go │ ├── recovery.go │ ├── request_id.go │ └── timeout.go ├── mock_test.go ├── mocks │ ├── config_watcher.go │ └── llm.go ├── processing │ ├── processor.go │ ├── processor_test.go │ └── types.go ├── provider │ ├── errors.go │ ├── execution.go │ ├── health.go │ ├── manager_singleflight_test.go │ ├── metrics.go │ ├── provider.go │ └── provider_test.go ├── routing │ ├── metrics.go │ ├── metrics_test.go │ ├── router.go │ └── router_test.go ├── server.go ├── server_test.go └── validation │ ├── middleware.go │ ├── middleware_test.go │ ├── schema.go │ └── schema_test.go └── tests ├── circuitbreaker_test.go └── docker_test.go /.env.example: -------------------------------------------------------------------------------- 1 | # .env.example 2 | # ANTHROPIC_API_KEY= 3 | # OPENAI_API_KEY= -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker Build and Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | branches: 8 | - main 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | build-and-push: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 # Ensures all tags are fetched for versioning 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Login to Docker Hub 27 | if: github.event_name != 'pull_request' 28 | uses: docker/login-action@v3 29 | with: 30 | username: ${{ secrets.DOCKERHUB_USERNAME }} 31 | password: ${{ secrets.DOCKERHUB_TOKEN }} 32 | ecr: auto 33 | logout: true 34 | 35 | - name: Extract metadata for Docker 36 | id: meta 37 | uses: docker/metadata-action@v5 38 | with: 39 | images: teilomillet/hapax 40 | tags: | 41 | type=semver,pattern={{version}} 42 | type=semver,pattern={{major}}.{{minor}} 43 | type=sha,prefix= 44 | type=ref,event=branch 45 | type=ref,event=pr 46 | type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} 47 | 48 | - name: Build and push Docker image 49 | uses: docker/build-push-action@v5 50 | with: 51 | context: . 52 | push: ${{ github.event_name != 'pull_request' }} 53 | tags: ${{ steps.meta.outputs.tags }} 54 | labels: ${{ steps.meta.outputs.labels }} 55 | cache-from: type=gha 56 | cache-to: type=gha,mode=max -------------------------------------------------------------------------------- /.github/workflows/go-ci.yml: -------------------------------------------------------------------------------- 1 | name: Go CI/CD 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | tags: 7 | - 'v[0-9]+.[0-9]+.[0-9]+' # Matches v0.1.0, v1.0.0, etc. 8 | pull_request: 9 | branches: [ main ] 10 | 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | 15 | jobs: 16 | test: 17 | name: Test & Lint 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 23 | 24 | - name: Set up Go 25 | uses: actions/setup-go@v4 26 | with: 27 | go-version: '1.21' 28 | cache: true 29 | 30 | - name: Install dependencies 31 | run: | 32 | go mod download 33 | go install golang.org/x/tools/cmd/goimports@latest 34 | 35 | - name: Run tests 36 | run: go test -race -coverprofile=coverage.txt -covermode=atomic ./... 37 | 38 | - name: Run golangci-lint 39 | uses: golangci/golangci-lint-action@v3 40 | with: 41 | version: latest 42 | args: --timeout=5m --out-format=colored-line-number --issues-exit-code=1 43 | only-new-issues: true 44 | skip-pkg-cache: true 45 | skip-build-cache: false 46 | 47 | - name: Upload coverage 48 | if: success() 49 | uses: codecov/codecov-action@v3 50 | with: 51 | file: ./coverage.txt 52 | fail_ci_if_error: false 53 | verbose: true 54 | 55 | build: 56 | name: Build Binary 57 | needs: test 58 | runs-on: ubuntu-latest 59 | steps: 60 | - uses: actions/checkout@v4 61 | with: 62 | fetch-depth: 0 63 | 64 | - name: Set up Go 65 | uses: actions/setup-go@v4 66 | with: 67 | go-version: '1.21' 68 | cache: true 69 | 70 | - name: Build 71 | run: | 72 | VERSION=$(git describe --tags --always --dirty) 73 | mkdir -p hapax 74 | go build -v -ldflags="-X main.Version=${VERSION}" -o hapax/hapax . 75 | 76 | - name: Upload artifact 77 | uses: actions/upload-artifact@v4 78 | with: 79 | name: hapax-binary 80 | path: ./hapax 81 | retention-days: 5 82 | 83 | release: 84 | name: Create Release 85 | needs: [test, build] 86 | runs-on: ubuntu-latest 87 | if: github.event_name == 'push' && contains(github.ref, 'refs/tags/v') 88 | steps: 89 | - uses: actions/checkout@v4 90 | with: 91 | fetch-depth: 0 92 | 93 | - name: Generate changelog 94 | id: changelog 95 | run: | 96 | echo "CHANGELOG<> $GITHUB_ENV 97 | 98 | # Get tag message for overview 99 | TAG_MSG=$(git tag -l --format='%(contents)' $(git describe --tags --abbrev=0)) 100 | echo "$TAG_MSG" >> $GITHUB_ENV 101 | echo "" >> $GITHUB_ENV 102 | 103 | # For first release, get all commits 104 | if ! git tag --sort=-v:refname | grep -q '^v'; then 105 | RANGE="$(git rev-list --max-parents=0 HEAD)..HEAD" 106 | else 107 | RANGE="$(git describe --tags --abbrev=0 HEAD^)..HEAD" 108 | fi 109 | 110 | echo "## Changes" >> $GITHUB_ENV 111 | echo "" >> $GITHUB_ENV 112 | 113 | # Features 114 | echo "### Features" >> $GITHUB_ENV 115 | # All commits, with attribution only for contributors 116 | git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* feat' | sed 's/feat: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true 117 | echo "" >> $GITHUB_ENV 118 | 119 | # Fixes 120 | echo "### Bug Fixes" >> $GITHUB_ENV 121 | # All commits, with attribution only for contributors 122 | git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* fix' | sed 's/fix: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true 123 | echo "" >> $GITHUB_ENV 124 | 125 | # Documentation 126 | echo "### Documentation" >> $GITHUB_ENV 127 | # All commits, with attribution only for contributors 128 | git log $RANGE --pretty=format:'* %s%ae' | grep -i '^* docs' | sed 's/docs: /* /' | sed "s/teilomillet@.*$//" | sed 's/\([^@]*\)@\(.*\)/ (@\2)/' >> $GITHUB_ENV || true 129 | echo "" >> $GITHUB_ENV 130 | 131 | # Dependencies 132 | echo "## Dependency Updates" >> $GITHUB_ENV 133 | echo "" >> $GITHUB_ENV 134 | if [ -f "go.mod" ]; then 135 | echo '```diff' >> $GITHUB_ENV 136 | if git rev-parse --verify HEAD^ >/dev/null 2>&1; then 137 | git diff HEAD^ HEAD go.mod | grep '^[+-]' | grep -v '^[+-]module' >> $GITHUB_ENV || true 138 | fi 139 | echo '```' >> $GITHUB_ENV 140 | fi 141 | echo "" >> $GITHUB_ENV 142 | 143 | # List contributors (excluding maintainer) 144 | echo "## Contributors" >> $GITHUB_ENV 145 | git log $RANGE --format='%aE' | sort -u | grep -v 'teilomillet' | sed 's/.*@/@/' | while read handle; do 146 | echo "* $handle" >> $GITHUB_ENV 147 | done 148 | 149 | echo "EOF" >> $GITHUB_ENV 150 | 151 | - name: Download binary 152 | uses: actions/download-artifact@v4 153 | with: 154 | name: hapax-binary 155 | path: ./ 156 | 157 | - name: Create Release 158 | uses: softprops/action-gh-release@v1 159 | env: 160 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 161 | with: 162 | body: ${{ env.CHANGELOG }} 163 | files: ./hapax/hapax 164 | draft: false 165 | prerelease: false 166 | generate_release_notes: false -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | go.work.sum 23 | 24 | # env file 25 | .env 26 | 27 | # Config files 28 | config.yaml 29 | 30 | /hapax 31 | coverage.txt 32 | go -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM golang:1.22-alpine AS builder 3 | 4 | # Install build dependencies 5 | RUN apk add --no-cache git gcc musl-dev 6 | 7 | # Set working directory 8 | WORKDIR /app 9 | 10 | # Copy go mod and sum files 11 | COPY go.mod go.sum ./ 12 | 13 | # Download dependencies 14 | RUN go mod download 15 | 16 | # Copy source code 17 | COPY . . 18 | 19 | # Build the application 20 | RUN CGO_ENABLED=0 GOOS=linux go build -o hapax ./cmd/hapax 21 | 22 | # Final stage 23 | FROM alpine:3.19 24 | 25 | # Add non-root user 26 | RUN adduser -D -g '' hapax 27 | 28 | # Install runtime dependencies 29 | RUN apk add --no-cache ca-certificates tzdata curl 30 | 31 | # Set working directory 32 | WORKDIR /app 33 | 34 | # Copy binary and configuration files 35 | COPY --from=builder /app/hapax . 36 | 37 | COPY config.example.yaml ./config.yaml 38 | COPY docker-compose.yml ./docker-compose.yml 39 | COPY prometheus.yml ./prometheus.yml 40 | 41 | # Use non-root user 42 | USER hapax 43 | 44 | # Expose ports 45 | EXPOSE 8081 46 | 47 | # Set healthcheck 48 | HEALTHCHECK --interval=10s --timeout=5s --start-period=10s --retries=3 \ 49 | CMD curl -f http://localhost:8081/health || exit 1 50 | 51 | # Run the application 52 | ENTRYPOINT ["./hapax"] 53 | 54 | CMD ["--config", "config.yaml"] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hapax: AI Infrastructure 2 | 3 | Hapax is a production-ready AI infrastructure layer that ensures uninterrupted AI operations through intelligent provider management and automatic failover. Named after the Greek word ἅπαξ (meaning "once"), it embodies our core promise: configure once, then let it seamlessly manage your AI infrastructure. 4 | 5 | ## Common AI Infrastructure Challenges 6 | 7 | Organizations face several critical challenges in managing their AI infrastructure. Service disruptions from AI provider outages create direct revenue impacts, while engineering teams dedicate significant resources to managing multiple AI providers. Teams struggle with limited visibility into AI usage across departments, compounded by complex integration requirements spanning different AI providers. 8 | 9 | ## Core Capabilities 10 | 11 | Hapax delivers a robust infrastructure layer through three core capabilities: 12 | 13 | ### Intelligent Provider Management 14 | The system ensures continuous service through real-time health monitoring with configurable timeouts and check intervals. Automatic failover between providers maintains zero downtime, while a sophisticated three-state circuit breaker (closed, half-open, open) with configurable thresholds prevents cascade failures. Request deduplication using the singleflight pattern optimizes resource utilization. 15 | 16 | ### Production-Ready Architecture 17 | The architecture prioritizes reliability through high-performance request routing and load balancing. Comprehensive error handling and request validation ensure data integrity, while structured logging with request tracing enables detailed debugging. Configurable timeout and rate limiting mechanisms protect system resources. 18 | 19 | ### Security & Monitoring 20 | Security is foundational, implemented through API key-based authentication and comprehensive request validation and sanitization. The monitoring system provides granular usage tracking per endpoint and detailed request logging for operational visibility. 21 | 22 | ## Usage Tracking & Monitoring 23 | 24 | Hapax provides built-in monitoring capabilities through Prometheus integration, offering comprehensive visibility into your AI infrastructure: 25 | 26 | ### Request Tracking 27 | Monitor API usage through versioned endpoints: 28 | ```bash 29 | # Standard endpoint structure 30 | /v1/completions 31 | /health # Global system health status 32 | /v1/health # Versioned API health status 33 | /metrics 34 | ``` 35 | 36 | ### Prometheus Integration 37 | The monitoring system tracks essential metrics including request counts and status by endpoint, request latencies, active request volume, error rates by provider, and circuit breaker states. Health check performance metrics and request deduplication statistics provide deep insights into system efficiency. 38 | 39 | Each metric is designed for operational visibility: 40 | - `hapax_http_requests_total` tracks request volume by endpoint and status 41 | - `hapax_http_request_duration_seconds` measures request latency 42 | - `hapax_http_active_requests` shows current load by endpoint 43 | - `hapax_errors_total` monitors error rates by type 44 | - `circuit_breaker_state` indicates provider health status 45 | - `hapax_health_check_duration_seconds` validates provider responsiveness 46 | - `hapax_deduplicated_requests_total` confirms request efficiency 47 | - `hapax_rate_limit_hits_total` tracks rate limiting by client 48 | 49 | ### Access Management 50 | Security is enforced through API key-based authentication, with per-endpoint rate limiting and comprehensive request validation and sanitization. 51 | 52 | ## Technical Implementation 53 | 54 | ```json 55 | // Example: Completion Request 56 | { 57 | "messages": [ 58 | {"role": "system", "content": "You are a customer service assistant."}, 59 | {"role": "user", "content": "I need help with my order #12345"} 60 | ] 61 | } 62 | ``` 63 | 64 | When your primary provider experiences issues, Hapax: 65 | 1. Detects the failure through continuous health checks (1-minute intervals) 66 | 2. Activates the circuit breaker after 3 consecutive failures 67 | 3. Routes traffic to healthy backup providers in preference order 68 | 4. Maintains detailed metrics for operational visibility 69 | 70 | ## Deployment Options 71 | 72 | Deploy Hapax in minutes with our production-ready container: 73 | 74 | ```bash 75 | docker run -p 8080:8080 \ 76 | -e OPENAI_API_KEY=your_key \ 77 | -e ANTHROPIC_API_KEY=your_key \ 78 | -e CONFIG_PATH=/app/config.yaml \ 79 | teilomillet/hapax:latest 80 | ``` 81 | 82 | Default configuration is provided but can be customized via `config.yaml`: 83 | ```yaml 84 | server: 85 | port: 8080 86 | read_timeout: 30s 87 | write_timeout: 45s 88 | max_header_bytes: 2097152 # 2MB 89 | shutdown_timeout: 30s 90 | http3: # Optional HTTP/3 support 91 | enabled: true 92 | port: 443 # Default HTTPS/QUIC port 93 | tls_cert_file: "/path/to/cert.pem" 94 | tls_key_file: "/path/to/key.pem" 95 | idle_timeout: 30s 96 | max_bi_streams_concurrent: 100 97 | max_uni_streams_concurrent: 100 98 | max_stream_receive_window: 6291456 # 6MB 99 | max_connection_receive_window: 15728640 # 15MB 100 | 101 | circuitBreaker: 102 | maxRequests: 100 103 | interval: 30s 104 | timeout: 10s 105 | failureThreshold: 5 106 | 107 | providerPreference: 108 | - ollama 109 | - anthropic 110 | - openai 111 | ``` 112 | 113 | ## Integration Architecture 114 | 115 | Hapax provides comprehensive integration capabilities through multiple components: 116 | 117 | ### REST API with Versioned Endpoints 118 | The API architecture provides dedicated endpoints for core functionalities: 119 | - `/v1/completions` handles AI completions, 120 | - `/v1/health` provides versioned API health monitoring, 121 | - `/health` offers global system health status. 122 | - `/metrics` exposes Prometheus metrics for comprehensive monitoring. 123 | 124 | ### Comprehensive Monitoring 125 | The monitoring infrastructure integrates Prometheus metrics across all critical components, enabling detailed tracking of request latencies, circuit breaker states, provider health status, and request deduplication. This comprehensive approach ensures complete operational visibility. 126 | 127 | ### Health Checks 128 | The health monitoring system operates with enterprise-grade configurability. Check intervals default to one minute with adjustable timeouts, while failure thresholds are tuned to prevent false positives. Health monitoring extends from individual providers to Docker container status, with granular per-provider health tracking. 129 | 130 | ### Production Safeguards 131 | System integrity is maintained through multiple safeguards: request deduplication prevents redundant processing, automatic failover ensures continuous operation, circuit breaker patterns protect against cascade failures, and structured JSON logging with correlation IDs enables thorough debugging. 132 | 133 | ### Protocol Support 134 | The server supports both HTTP/1.1 and HTTP/3 (QUIC) protocols: 135 | - HTTP/1.1 for universal compatibility 136 | - HTTP/3 for improved performance: 137 | - Reduced latency through 0-RTT connections 138 | - Better multiplexing with independent streams 139 | - Improved congestion control 140 | - Automatic connection migration 141 | - Built-in TLS 1.3 encryption 142 | 143 | ## Technical Requirements 144 | 145 | Running Hapax requires: 146 | - Docker-compatible environment with network access to AI providers 147 | - 1GB RAM minimum (4GB recommended for production) 148 | - TLS certificates for HTTP/3 support (if enabled) 149 | - Access credentials (API keys) for supported providers: OpenAI, Anthropic, etc. 150 | 151 | ## Documentation 152 | 153 | Comprehensive documentation is available through multiple resources. The [Quick Start Guide](https://github.com/teilomillet/hapax/wiki) provides initial setup instructions, while detailed information about the API and security measures can be found in the [API Documentation](docs/api.md) and [Security Overview](docs/security.md). For operational insights, consult the [Monitoring Guide](docs/monitoring.md). 154 | 155 | ## License 156 | 157 | Licensed under Apache 2.0. See [LICENSE](LICENSE) for details. 158 | 159 | --- 160 | 161 | For detailed technical specifications, visit our [Technical Documentation](docs/technical.md). -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- 1 | # Hapax Development Roadmap 2 | 3 | ## Vision 4 | Hapax is the reliability layer between your code and LLM providers. We're building an open-source infrastructure layer that makes LLM operations robust and predictable. Our goal is to provide the tools and visibility you need to run AI applications with confidence, whether you're a solo developer or running large-scale deployments. 5 | 6 | ### Core Principles 7 | - **Reliability**: Smart provider management for uninterrupted operations 8 | - **Visibility**: Clear insights into your LLM infrastructure 9 | - **Flexibility**: Adaptable to your security and scaling needs 10 | - **Simplicity**: Complex infrastructure made approachable 11 | 12 | ## v0.1.0: Foundation (Current) 13 | Focus: Core functionality and initial production readiness. 14 | 15 | ### Core Features 16 | - [x] Request queueing and deduplication 17 | - [x] HTTP/3 (QUIC) implementation 18 | - High-performance transport layer 19 | - 0-RTT connection establishment 20 | - Connection migration 21 | - Multiplexing optimization 22 | - TLS 1.3 integration 23 | 24 | ### Documentation 25 | - [x] Installation and Configuration 26 | - Deployment guide 27 | - Configuration reference 28 | - Security setup 29 | - Performance tuning 30 | - [x] API Documentation 31 | - Endpoint specifications 32 | - Request/response formats 33 | - Error handling 34 | - Authentication 35 | - [x] Operations Guide 36 | - Monitoring setup 37 | - Metrics reference 38 | - Logging guide 39 | - Troubleshooting 40 | 41 | ## v0.2.0: Enterprise Observability 42 | Focus: Deep visibility and operational intelligence. 43 | 44 | ### Advanced Monitoring 45 | - [ ] Enhanced metrics collection 46 | - Detailed latency tracking 47 | - Resource utilization metrics 48 | - Provider-specific metrics 49 | - Custom metric pipelines 50 | - [ ] Advanced audit logging 51 | - Structured audit events 52 | - Compliance-ready logging 53 | - Log aggregation support 54 | - Log retention policies 55 | - [ ] Operational dashboards 56 | - Real-time system visibility 57 | - Performance analytics 58 | - Health monitoring 59 | - Alert management 60 | 61 | ### Security Enhancements 62 | - [ ] Role-based access control 63 | - Fine-grained permissions 64 | - Resource-level access 65 | - Audit trails 66 | - Identity provider integration 67 | - [ ] Enhanced security features 68 | - Request validation 69 | - Rate limiting 70 | - Token management 71 | - Security event monitoring 72 | 73 | ## v0.3.0: Enterprise Scale 74 | Focus: Horizontal scaling and high availability. 75 | 76 | ### Distributed Architecture 77 | - [ ] Cluster mode 78 | - Leader election 79 | - State synchronization 80 | - Node auto-discovery 81 | - Cross-node routing 82 | - [ ] Advanced request management 83 | - Dynamic rate limiting 84 | - Request quotas 85 | - Load balancing 86 | - Circuit breaking 87 | - [ ] Distributed caching 88 | - Cache strategies 89 | - Invalidation rules 90 | - Memory management 91 | - Cache analytics 92 | 93 | ### Enterprise Integration 94 | - [ ] Advanced routing 95 | - Content-based routing 96 | - Traffic splitting 97 | - Request transformation 98 | - Custom middleware 99 | - [ ] Provider management 100 | - Multi-provider failover 101 | - Provider health tracking 102 | - Cost optimization 103 | - Usage analytics 104 | 105 | ## v1.0.0: Production Scale 106 | Focus: Mission-critical deployment capabilities. 107 | 108 | ### Performance & Reliability 109 | - [ ] Advanced performance features 110 | - Connection pooling 111 | - Request batching 112 | - Memory optimization 113 | - CPU optimization 114 | - [ ] Reliability enhancements 115 | - Automated failover 116 | - Self-healing 117 | - Predictive scaling 118 | - Performance prediction 119 | 120 | ### Enterprise Operations 121 | - [ ] Cost management 122 | - Usage tracking 123 | - Budget controls 124 | - Cost allocation 125 | - Usage forecasting 126 | - [ ] SLA management 127 | - SLA definition 128 | - Performance tracking 129 | - Availability monitoring 130 | - Compliance reporting 131 | 132 | ### Advanced Features 133 | - [ ] Multi-region support 134 | - Geographic routing 135 | - Regional failover 136 | - Data sovereignty 137 | - Cross-region analytics 138 | - [ ] Advanced security 139 | - Zero-trust architecture 140 | - Advanced threat detection 141 | - Security analytics 142 | - Compliance automation 143 | 144 | ## Success Metrics 145 | - Sub-minute deployment time 146 | - Zero-touch configuration 147 | - 99.99% availability 148 | - < 50ms added latency 149 | - Zero security vulnerabilities 150 | - Automatic failure recovery 151 | - Complete operational visibility 152 | 153 | ## Future Considerations 154 | - Edge computing integration 155 | - Custom model hosting 156 | - Model performance analytics 157 | - Fine-tuning infrastructure 158 | - Hybrid deployment models 159 | - Advanced protocol support 160 | 161 | ## Notes 162 | - Security and reliability are continuous priorities 163 | - Each feature includes comprehensive testing and documentation 164 | - Regular security audits are mandatory 165 | - Features may be reprioritized based on enterprise requirements 166 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | Place where I write notes about what need to be done 2 | -------------------------------------------------------------------------------- /bin/golangci-lint: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teilomillet/hapax/7c08e64129a1c0adfea8a65dd7a608fd2b345a37/bin/golangci-lint -------------------------------------------------------------------------------- /cmd/hapax/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "os/signal" 10 | "syscall" 11 | 12 | "github.com/teilomillet/hapax/config" 13 | "github.com/teilomillet/hapax/server" 14 | "go.uber.org/zap" 15 | ) 16 | 17 | var ( 18 | configFile = flag.String("config", "hapax.yaml", "Path to configuration file") 19 | validate = flag.Bool("validate", false, "Validate configuration and exit") 20 | version = flag.Bool("version", false, "Print version and exit") 21 | ) 22 | 23 | // Version represents the current version of Hapax 24 | const Version = "v0.1.0" 25 | 26 | func main() { 27 | flag.Parse() 28 | 29 | if *version { 30 | fmt.Printf("hapax %s\n", Version) 31 | os.Exit(0) 32 | } 33 | 34 | // Create logger 35 | logger, err := zap.NewProduction() 36 | if err != nil { 37 | log.Fatalf("Failed to create logger: %v", err) 38 | } 39 | defer func() { 40 | if err := logger.Sync(); err != nil { 41 | // Log sync failure, but use fmt.Fprintf to stderr since the zap logger might be unavailable 42 | fmt.Fprintf(os.Stderr, "Failed to sync logger: %v\n", err) 43 | } 44 | }() 45 | 46 | // Load configuration 47 | cfg, err := config.LoadFile(*configFile) 48 | if err != nil { 49 | logger.Fatal("Failed to load config", 50 | zap.Error(err), 51 | zap.String("config_file", *configFile), 52 | ) 53 | } 54 | 55 | // Just validate and exit if requested 56 | if *validate { 57 | fmt.Println("Configuration is valid") 58 | os.Exit(0) 59 | } 60 | 61 | // Create server with config path and logger 62 | srv, err := server.NewServer(*configFile, logger) 63 | if err != nil { 64 | logger.Fatal("Failed to create server", 65 | zap.Error(err), 66 | ) 67 | } 68 | 69 | // Setup signal handling for graceful shutdown 70 | ctx, cancel := context.WithCancel(context.Background()) 71 | defer cancel() 72 | 73 | go func() { 74 | sigCh := make(chan os.Signal, 1) 75 | signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) 76 | <-sigCh 77 | logger.Info("Received shutdown signal") 78 | cancel() 79 | }() 80 | 81 | // Start server 82 | logger.Info("Starting hapax", 83 | zap.String("version", Version), 84 | zap.Int("port", cfg.Server.Port), 85 | ) 86 | 87 | if err := srv.Start(ctx); err != nil { 88 | logger.Fatal("Server error", 89 | zap.Error(err), 90 | ) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /config.example.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 8080 3 | read_timeout: 30s 4 | write_timeout: 30s 5 | max_header_bytes: 1048576 6 | shutdown_timeout: 5s 7 | 8 | llm: 9 | provider: anthropic 10 | model: claude-3.5-haiku-latest 11 | api_key: ${ANTHROPIC_API_KEY} 12 | max_context_tokens: 100000 13 | retry: 14 | max_retries: 3 15 | initial_delay: 100ms 16 | max_delay: 2s 17 | multiplier: 2.0 18 | retryable_errors: ["rate_limit", "timeout", "server_error"] 19 | 20 | providers: 21 | anthropic: 22 | type: anthropic 23 | model: claude-3.5-haiku-latest 24 | api_key: ${ANTHROPIC_API_KEY} 25 | ollama: 26 | type: ollama 27 | model: llama3 28 | api_key: "" 29 | 30 | provider_preference: 31 | - anthropic 32 | - ollama 33 | 34 | logging: 35 | level: info 36 | format: json 37 | 38 | metrics: 39 | enabled: true 40 | prometheus: 41 | enabled: true 42 | 43 | routes: 44 | - path: /v1/completions 45 | handler: completion 46 | version: v1 47 | methods: [POST] 48 | - path: /health 49 | handler: health 50 | version: v1 51 | methods: [GET] 52 | 53 | processing: 54 | request_templates: 55 | default: "{{.Input}}" 56 | chat: "{{range .Messages}}{{.Role}}: {{.Content}}\n{{end}}" 57 | response_formatting: 58 | clean_json: true 59 | trim_whitespace: true 60 | max_length: 1048576 -------------------------------------------------------------------------------- /config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestLoadValidConfig(t *testing.T) { 10 | yamlConfig := ` 11 | server: 12 | port: 9090 13 | read_timeout: 45s 14 | write_timeout: 45s 15 | max_header_bytes: 2097152 16 | shutdown_timeout: 45s 17 | 18 | llm: 19 | provider: openai 20 | model: gpt-4 21 | endpoint: https://api.openai.com/v1 22 | system_prompt: "You are a helpful assistant." 23 | options: 24 | temperature: 0.8 25 | max_tokens: 4000 26 | 27 | logging: 28 | level: debug 29 | format: json 30 | 31 | routes: 32 | - path: /v1/completions 33 | handler: completion 34 | version: v1 35 | - path: /health 36 | handler: health 37 | version: v1 38 | ` 39 | 40 | config, err := Load(strings.NewReader(yamlConfig)) 41 | if err != nil { 42 | t.Fatalf("Failed to load valid config: %v", err) 43 | } 44 | 45 | // Check server config 46 | if config.Server.Port != 9090 { 47 | t.Errorf("unexpected port: got %d, want %d", config.Server.Port, 9090) 48 | } 49 | if config.Server.ReadTimeout != 45*time.Second { 50 | t.Errorf("unexpected read timeout: got %v, want %v", config.Server.ReadTimeout, 45*time.Second) 51 | } 52 | 53 | // Check LLM config 54 | if config.LLM.Provider != "openai" { 55 | t.Errorf("unexpected provider: got %s, want %s", config.LLM.Provider, "openai") 56 | } 57 | if config.LLM.Model != "gpt-4" { 58 | t.Errorf("unexpected model: got %s, want %s", config.LLM.Model, "gpt-4") 59 | } 60 | 61 | // Check logging config 62 | if config.Logging.Level != "debug" { 63 | t.Errorf("unexpected log level: got %s, want %s", config.Logging.Level, "debug") 64 | } 65 | if config.Logging.Format != "json" { 66 | t.Errorf("unexpected log format: got %s, want %s", config.Logging.Format, "json") 67 | } 68 | 69 | // Check routes 70 | if len(config.Routes) != 2 { 71 | t.Errorf("unexpected number of routes: got %d, want %d", len(config.Routes), 2) 72 | } 73 | } 74 | 75 | func TestLoadInvalidConfig(t *testing.T) { 76 | tests := []struct { 77 | name string 78 | config string 79 | want string 80 | }{ 81 | { 82 | name: "invalid port", 83 | config: ` 84 | server: 85 | port: -1 86 | `, 87 | want: "invalid port", 88 | }, 89 | { 90 | name: "invalid log level", 91 | config: ` 92 | logging: 93 | level: invalid 94 | `, 95 | want: "invalid log level", 96 | }, 97 | { 98 | name: "empty provider", 99 | config: ` 100 | llm: 101 | provider: "" 102 | `, 103 | want: "empty LLM provider", 104 | }, 105 | { 106 | name: "empty route path", 107 | config: ` 108 | routes: 109 | - path: "" 110 | handler: test 111 | `, 112 | want: "empty path", 113 | }, 114 | } 115 | 116 | for _, tt := range tests { 117 | t.Run(tt.name, func(t *testing.T) { 118 | _, err := Load(strings.NewReader(tt.config)) 119 | if err == nil { 120 | t.Error("expected error, got nil") 121 | } else if !strings.Contains(err.Error(), tt.want) { 122 | t.Errorf("unexpected error: got %v, want %v", err, tt.want) 123 | } 124 | }) 125 | } 126 | } 127 | 128 | func TestDefaultConfig(t *testing.T) { 129 | config := DefaultConfig() 130 | 131 | // Check server defaults 132 | if config.Server.Port != 8080 { 133 | t.Errorf("unexpected default port: got %d, want %d", config.Server.Port, 8080) 134 | } 135 | if config.Server.ReadTimeout != 30*time.Second { 136 | t.Errorf("unexpected default read timeout: got %v, want %v", config.Server.ReadTimeout, 30*time.Second) 137 | } 138 | 139 | // Check LLM defaults 140 | if config.LLM.Provider != "ollama" { 141 | t.Errorf("unexpected default provider: got %s, want %s", config.LLM.Provider, "ollama") 142 | } 143 | if config.LLM.Model != "llama2" { 144 | t.Errorf("unexpected default model: got %s, want %s", config.LLM.Model, "llama2") 145 | } 146 | 147 | // Check logging defaults 148 | if config.Logging.Level != "info" { 149 | t.Errorf("unexpected default log level: got %s, want %s", config.Logging.Level, "info") 150 | } 151 | if config.Logging.Format != "json" { 152 | t.Errorf("unexpected default log format: got %s, want %s", config.Logging.Format, "json") 153 | } 154 | 155 | // Check default routes 156 | if len(config.Routes) != 3 { 157 | t.Errorf("unexpected number of default routes: got %d, want %d", 158 | len(config.Routes), 3) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /config/config_watcher.go: -------------------------------------------------------------------------------- 1 | // config_watcher.go 2 | package config 3 | 4 | import ( 5 | "fmt" 6 | "sync/atomic" 7 | 8 | "github.com/fsnotify/fsnotify" 9 | "go.uber.org/zap" 10 | ) 11 | 12 | // Verify at compile time that ConfigWatcher implements Watcher 13 | var _ Watcher = (*ConfigWatcher)(nil) 14 | 15 | // ConfigWatcher manages configuration hot reloading 16 | type ConfigWatcher struct { 17 | // Using atomic.Value for thread-safe config access 18 | currentConfig atomic.Value 19 | configPath string 20 | watcher *fsnotify.Watcher 21 | logger *zap.Logger 22 | // Channel to notify subscribers of config changes 23 | subscribers []chan<- *Config 24 | } 25 | 26 | // NewConfigWatcher creates a new configuration watcher 27 | func NewConfigWatcher(configPath string, logger *zap.Logger) (*ConfigWatcher, error) { 28 | watcher, err := fsnotify.NewWatcher() 29 | if err != nil { 30 | return nil, fmt.Errorf("failed to create watcher: %w", err) 31 | } 32 | 33 | cw := &ConfigWatcher{ 34 | configPath: configPath, 35 | watcher: watcher, 36 | logger: logger, 37 | } 38 | 39 | // Load initial configuration 40 | initialConfig, err := LoadFile(configPath) 41 | if err != nil { 42 | return nil, fmt.Errorf("failed to load initial config: %w", err) 43 | } 44 | cw.currentConfig.Store(initialConfig) 45 | 46 | // Start watching the config file 47 | if err := watcher.Add(configPath); err != nil { 48 | return nil, fmt.Errorf("failed to watch config file: %w", err) 49 | } 50 | 51 | go cw.watchConfig() 52 | return cw, nil 53 | } 54 | 55 | // Subscribe allows components to receive config updates 56 | func (cw *ConfigWatcher) Subscribe() <-chan *Config { 57 | ch := make(chan *Config, 1) 58 | cw.subscribers = append(cw.subscribers, ch) 59 | return ch 60 | } 61 | 62 | // GetCurrentConfig returns the current configuration thread-safely 63 | func (cw *ConfigWatcher) GetCurrentConfig() *Config { 64 | return cw.currentConfig.Load().(*Config) 65 | } 66 | 67 | func (cw *ConfigWatcher) watchConfig() { 68 | for { 69 | select { 70 | case event, ok := <-cw.watcher.Events: 71 | if !ok { 72 | return 73 | } 74 | if event.Op&fsnotify.Write == fsnotify.Write { 75 | cw.handleConfigChange() 76 | } 77 | case err, ok := <-cw.watcher.Errors: 78 | if !ok { 79 | return 80 | } 81 | cw.logger.Error("Config watcher error", zap.Error(err)) 82 | } 83 | } 84 | } 85 | 86 | func (cw *ConfigWatcher) handleConfigChange() { 87 | cw.logger.Info("Detected config file change, reloading...") 88 | 89 | newConfig, err := LoadFile(cw.configPath) 90 | if err != nil { 91 | cw.logger.Error("Failed to load new config", zap.Error(err)) 92 | return 93 | } 94 | 95 | // Validate the new configuration 96 | if err := newConfig.Validate(); err != nil { 97 | cw.logger.Error("Invalid new configuration", zap.Error(err)) 98 | return 99 | } 100 | 101 | // Store the new configuration 102 | cw.currentConfig.Store(newConfig) 103 | 104 | // Notify all subscribers 105 | for _, sub := range cw.subscribers { 106 | select { 107 | case sub <- newConfig: 108 | default: 109 | // Skip if subscriber is not ready 110 | } 111 | } 112 | 113 | cw.logger.Info("Configuration reloaded successfully") 114 | } 115 | 116 | func (cw *ConfigWatcher) Close() error { 117 | return cw.watcher.Close() 118 | } 119 | -------------------------------------------------------------------------------- /config/processing.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // ProcessingConfig defines the configuration for request/response processing 4 | type ProcessingConfig struct { 5 | // RequestTemplates maps template names to their content 6 | RequestTemplates map[string]string `yaml:"request_templates"` 7 | 8 | // ResponseFormatting configures how responses should be formatted 9 | ResponseFormatting ResponseFormattingConfig `yaml:"response_formatting"` 10 | } 11 | 12 | // ResponseFormattingConfig defines response formatting options 13 | type ResponseFormattingConfig struct { 14 | // CleanJSON enables JSON response cleaning using gollm 15 | CleanJSON bool `yaml:"clean_json"` 16 | 17 | // TrimWhitespace removes extra whitespace from responses 18 | TrimWhitespace bool `yaml:"trim_whitespace"` 19 | 20 | // MaxLength limits the response length 21 | MaxLength int `yaml:"max_length"` 22 | } 23 | -------------------------------------------------------------------------------- /config/watcher_interface.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // Watcher defines the behavior we expect from any configuration watcher 4 | type Watcher interface { 5 | GetCurrentConfig() *Config 6 | Subscribe() <-chan *Config 7 | Close() error 8 | } 9 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | hapax: 3 | build: . 4 | container_name: hapax 5 | ports: 6 | - "8081:8081" 7 | volumes: 8 | - ./config.yaml:/app/config.yaml 9 | environment: 10 | - TZ=UTC 11 | restart: unless-stopped 12 | healthcheck: 13 | test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8081/health"] 14 | interval: 30s 15 | timeout: 3s 16 | retries: 3 17 | networks: 18 | - hapax-net 19 | 20 | prometheus: 21 | image: prom/prometheus:latest 22 | ports: 23 | - "9090:9090" 24 | volumes: 25 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 26 | depends_on: 27 | - hapax 28 | networks: 29 | - hapax-net 30 | 31 | networks: 32 | hapax-net: 33 | driver: bridge 34 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | title: "Hapax" 2 | description: "The reliability layer between your code and LLM providers" 3 | remote_theme: "just-the-docs/just-the-docs" 4 | 5 | # Theme and Navigation 6 | color_scheme: "dark" 7 | nav_spacing: "normal" 8 | heading_anchors: true 9 | back_to_top: true 10 | back_to_top_text: "Back to top" 11 | 12 | # Theme Overrides 13 | color_scheme: "dark" 14 | custom_css: true 15 | custom_css_path: "/assets/css/custom.css" 16 | 17 | # Typography and Spacing 18 | spacing_unit: 2 19 | font_size: "16px" 20 | content_width: "900px" 21 | typographic_spacing: true 22 | 23 | # Heading Styles 24 | heading_spacing: 2.5 25 | h1_size: "3rem" 26 | h2_size: "2.5rem" 27 | h3_size: "1.75rem" 28 | h4_size: "1.25rem" 29 | 30 | # Text Styles 31 | body_line_height: 1.8 32 | body_font_family: "system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif" 33 | mono_font_family: "'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, Courier, monospace" 34 | 35 | # Search Configuration 36 | search_enabled: true 37 | search: 38 | heading_level: 2 39 | previews: 3 40 | preview_words_before: 5 41 | preview_words_after: 5 42 | tokenizer_separator: "/[\\s/]+/" 43 | rel_url: true 44 | button: false 45 | 46 | # Code and Content 47 | enable_copy_code_button: true 48 | code_block_padding: "1.5rem" 49 | code_font_size: "0.95em" 50 | 51 | # Callouts for Important Information 52 | callouts: 53 | note: 54 | title: "Note" 55 | color: "blue" 56 | important: 57 | title: "Important" 58 | color: "purple" 59 | warning: 60 | title: "Warning" 61 | color: "red" 62 | tip: 63 | title: "Tip" 64 | color: "green" 65 | security: 66 | title: "Security" 67 | color: "yellow" 68 | 69 | # External Links 70 | aux_links: 71 | "GitHub": ["//github.com/teilomillet/hapax"] 72 | aux_links_new_tab: true 73 | 74 | # License 75 | footer_content: "Licensed under Apache License, Version 2.0." 76 | 77 | # Additional Styles 78 | callouts_opacity: 1 79 | callouts_padding: "1.5rem" 80 | table_font_size: "0.95em" 81 | nav_font_size: "1rem" 82 | sidebar_width: "280px" -------------------------------------------------------------------------------- /docs/assets/css/custom.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/blog/why-we-built-hapax.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Hapax - The Missing Layer in Enterprise AI Infrastructure 4 | nav_order: 1 5 | --- 6 | 7 | # Hapax: The Missing Layer in Enterprise AI Infrastructure 8 | 9 | Every conversation with companies implementing AI follows a strikingly similar pattern. As a consultant, I'd walk into their offices and find teams of engineers wrestling with the same fundamental challenges. They weren't struggling with the exciting parts of AI - the innovative features or creative applications. Instead, they were bogged down by infrastructure concerns that seemed to repeat across every organization. 10 | 11 | The story usually begins with experimentation. A company starts playing with different AI models, testing capabilities across providers like OpenAI, Anthropic, and Ollama. They're model hoppers, constantly switching between providers as they discover new capabilities or run into limitations. This experimentation is valuable, but it creates a hidden cost: each switch requires engineering time to adapt their infrastructure. 12 | 13 | What struck me most was watching companies build the same solutions over and over. One week, I'd watch a startup implement retry logic for their AI calls. The next week, I'd find an enterprise team building nearly identical failover systems. These weren't small companies making rookie mistakes - these were sophisticated teams spending valuable time solving infrastructure problems instead of building their core products. 14 | 15 | The real wake-up call came when discussing monitoring and usage tracking. Companies could tell me their total API costs, but they struggled to answer basic questions about their AI operations. Which endpoints were most active? What was their actual uptime? How were different teams using these services? The data existed, but the infrastructure to make sense of it didn't. 16 | 17 | The pattern became clear: the missing piece wasn't AI capability - it was the infrastructure layer that makes AI reliable, observable, and manageable in production. Small companies were hitting a wall, forced to choose between hiring specialized talent or limiting their AI ambitions. Large corporations were forming entire teams just to manage these basic infrastructure needs. 18 | 19 | When I looked at how companies were handling these challenges, I saw a concerning pattern. Each organization was building their own infrastructure from scratch, writing thousands of lines of code to handle basic needs like retries and failover. A typical homegrown solution might look something like this: 20 | 21 | ```python 22 | async def make_ai_request(prompt, retries=3): 23 | for attempt in range(retries): 24 | try: 25 | response = await primary_provider.create_completion(prompt) 26 | return response 27 | except ProviderError: 28 | if attempt == retries - 1: 29 | try: 30 | # Attempt with backup provider 31 | return await backup_provider.create_completion(prompt) 32 | except: 33 | raise 34 | time.sleep(2 ** attempt) # Basic exponential backoff 35 | ``` 36 | 37 | This code might work for simple cases, but it lacks proper error handling, doesn't consider provider health, offers no visibility into performance, and requires significant maintenance as providers evolve. Now multiply this across different teams and departments, each building their own version, each maintaining their own infrastructure. 38 | 39 | Hapax transforms this complexity into a simple configuration: 40 | 41 | ```yaml 42 | providers: 43 | anthropic: 44 | type: anthropic 45 | model: claude-3-haiku 46 | api_key: ${ANTHROPIC_API_KEY} 47 | openai: 48 | type: openai 49 | model: gpt-4 50 | api_key: ${OPENAI_API_KEY} 51 | 52 | provider_preference: 53 | - anthropic 54 | - openai 55 | ``` 56 | 57 | With this configuration, you get enterprise-grade infrastructure that includes intelligent failover between providers with health monitoring, comprehensive metrics through Prometheus integration, sophisticated request queuing and deduplication, real-time performance monitoring, structured logging for debugging, and HTTP/3 support for mobile users. 58 | 59 | Consider how this changes your operations. Instead of each team implementing their own retry logic and monitoring, they can focus on building features. When a provider has issues, Hapax automatically detects the problem through its health checks and routes traffic to healthy providers. Your applications continue running without interruption. 60 | 61 | The monitoring system gives you immediate visibility into your AI operations. Want to understand how different departments use AI? Create department-specific endpoints: 62 | 63 | ```yaml 64 | routes: 65 | - path: "/v1/marketing/completions" 66 | handler: completion 67 | version: v1 68 | metrics_labels: 69 | department: marketing 70 | ``` 71 | 72 | Now you can track usage, performance, and costs per department through your existing monitoring tools like Grafana, Power BI or Tableau. No custom integration required - Hapax provides these metrics through standard Prometheus endpoints. 73 | 74 | For mobile applications, Hapax's HTTP/3 support ensures reliable service even as users move between networks. The connection migration capabilities mean that if a user switches from WiFi to cellular, their AI interactions continue seamlessly. This isn't just a technical feature - it's about providing consistent service to your users regardless of their connection. 75 | 76 | Think about what this means for your organization. Rather than every team reinventing infrastructure, you have a standardized, production-ready solution that deploys in minutes with a single Docker command, integrates with your existing monitoring stack, handles provider failures automatically, gives you complete visibility into AI operations, and scales with your needs. 77 | 78 | The real power of Hapax becomes clear when you look toward the future. As AI continues to transform how we build software, the need for reliable, observable infrastructure only grows. Consider how your organization's AI journey might unfold: 79 | 80 | Today, you might start with a simple customer service enhancement using LLMs. With Hapax, this means adding a few lines to your configuration file, and suddenly you have production-ready infrastructure that rivals what large tech companies have built internally. Your engineers don't need to worry about provider outages or performance monitoring - they can focus entirely on crafting the perfect customer experience. 81 | 82 | As your AI usage grows, Hapax grows with you. When your marketing team wants to experiment with different AI providers for content generation, you won't need to build new infrastructure or hire specialists. They can simply use their dedicated endpoint, while Hapax handles the complexity of provider management and gives them real-time visibility into their usage and costs. 83 | 84 | The transformation continues as AI becomes central to your operations. Your data science team might want to A/B test different models, your product team might need geographic routing for global customers, and your finance team might require detailed cost allocation. With Hapax, these aren't infrastructure challenges - they're just configuration changes. 85 | 86 | This standardization brings another powerful benefit: knowledge sharing across your organization. Instead of each team developing their own best practices for AI deployment, they build on a common foundation. A solution discovered by your customer service team can be immediately applied to your sales team's AI implementations. Your organization learns and improves as a unified whole. 87 | 88 | We're building Hapax in the open because we believe reliable AI infrastructure shouldn't be limited to companies with massive engineering resources. Whether you're a startup launching your first AI feature or an enterprise scaling to millions of requests, you deserve infrastructure that just works. 89 | 90 | Ready to transform how your organization builds with AI? Deploy Hapax in minutes with our Docker container, or dive into our documentation to learn more. Join us in building the foundation for the next generation of AI applications - where infrastructure is an enabler, not a barrier, to innovation. 91 | 92 | [Get Started with Hapax](/docs/getting-started) 93 | [Join Our Community](https://github.com/teilomillet/hapax) 94 | [Read the Documentation](/docs/) -------------------------------------------------------------------------------- /docs/getting-started.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teilomillet/hapax/7c08e64129a1c0adfea8a65dd7a608fd2b345a37/docs/getting-started.md -------------------------------------------------------------------------------- /docs/getting-started/5-minute-setup.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: 5-Minute Setup 4 | parent: Getting Started 5 | nav_order: 1 6 | --- 7 | 8 | # 5-Minute Setup 9 | 10 | This guide will get you running with Hapax in under 5 minutes using Docker. 11 | 12 | {: .note } 13 | > **Prerequisites** 14 | > - Docker installed 15 | > - API key from any supported provider (OpenAI, Anthropic, etc.) 16 | 17 | ## 1. Run Hapax 18 | 19 | Copy and run this command, replacing `your_key` with your API key: 20 | 21 | ```bash 22 | docker run -p 8080:8080 \ 23 | -e OPENAI_API_KEY=your_key \ 24 | teilomillet/hapax:latest 25 | ``` 26 | 27 | ## 2. Verify Installation 28 | 29 | Test that Hapax is running: 30 | 31 | ```bash 32 | curl http://localhost:8080/health 33 | # Expected: {"status":"ok"} 34 | ``` 35 | 36 | ## 3. Make Your First Request 37 | 38 | Send a test completion request: 39 | 40 | ```bash 41 | curl -X POST http://localhost:8080/v1/completions \ 42 | -H "Content-Type: application/json" \ 43 | -d '{"messages":[{"role":"user","content":"Hello"}]}' 44 | ``` -------------------------------------------------------------------------------- /docs/getting-started/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Getting Started 4 | nav_order: 2 5 | has_children: true 6 | --- 7 | 8 | # Getting Started with Hapax 9 | 10 | Choose your path to get started with Hapax: 11 | 12 | {: .note } 13 | > **New to Hapax?** 14 | > Start with our [5-Minute Setup](5-minute-setup) guide to get running quickly. 15 | 16 | ## Installation Options 17 | 18 | 1. **[5-Minute Setup](5-minute-setup)** 19 | - Fastest path to running Hapax 20 | - Uses Docker 21 | - Minimal configuration needed 22 | 23 | 2. **[Manual Installation](manual-installation)** 24 | - Full control over installation 25 | - Suitable for development 26 | - Access to all features 27 | 28 | 3. **[Production Setup](../production)** 29 | - Secure deployment configuration 30 | - Monitoring setup 31 | - Performance optimization -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: home 3 | title: Home 4 | nav_order: 1 5 | --- 6 | 7 | # Hapax Documentation 8 | 9 | {: .fs-9 } 10 | The reliability layer between your code and LLM providers 11 | 12 | {: .fs-6 .fw-300 } 13 | A lightweight, production-ready infrastructure layer that ensures continuous operation through intelligent provider management and automatic failover. 14 | 15 | [Quick Start](getting-started/5-minute-setup){: .btn .btn-primary .fs-5 .mb-4 .mb-md-0 .mr-2 } 16 | [View Source](https://github.com/teilomillet/hapax){: .btn .fs-5 .mb-4 .mb-md-0 } 17 | 18 | --- 19 | 20 | ## Why Hapax? 21 | 22 | {: .important } 23 | > Hapax addresses the fundamental challenges of working with LLM providers: service reliability, provider management, and operational visibility. 24 | 25 | ### Key Benefits 26 | 27 | {: .note } 28 | > **Continuous Operation** 29 | > Automatic failover between providers maintains service availability during outages or degraded performance. 30 | 31 | {: .note } 32 | > **Minimal Configuration** 33 | > Single configuration file handles all provider settings, health checks, and failover logic. 34 | 35 | {: .note } 36 | > **Operational Insight** 37 | > Built-in metrics expose detailed provider performance, request patterns, and system health. 38 | 39 | ## Core Features 40 | 41 | ### Intelligent Provider Management 42 | - Health monitoring with configurable thresholds 43 | - Automatic provider failover 44 | - Circuit breaker implementation 45 | - Request deduplication 46 | 47 | ### System Architecture 48 | - Request routing and load distribution 49 | - Comprehensive error handling 50 | - Structured logging with request tracing 51 | - HTTP/3 support 52 | 53 | ### Security and Monitoring 54 | - API key-based authentication 55 | - Request validation 56 | - Usage metrics per endpoint 57 | - Prometheus integration 58 | 59 | ## Documentation 60 | 61 | - [Quick Start](getting-started/5-minute-setup) 62 | - [Core Features](core-features) 63 | - [Production Setup](production) 64 | - [API Reference](api) 65 | 66 | ## Development 67 | 68 | Find issues or want to contribute? 69 | - [Source Code](https://github.com/teilomillet/hapax) 70 | - [Issue Tracker](https://github.com/teilomillet/hapax/issues) 71 | - [Security Guide](production/security) 72 | - [Configuration Reference](getting-started/configuration) -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Installation 4 | nav_order: 2 5 | --- 6 | 7 | # Installation Guide 8 | 9 | This guide helps you choose and implement the best installation method for your needs. Each method has been thoroughly tested and validated in production environments. 10 | 11 | ## Choosing Your Installation Method 12 | 13 | Hapax offers multiple installation methods to accommodate different use cases. Whether you're evaluating the service, developing new features, or deploying to production, there's a path designed for your needs. 14 | 15 | ### Quick Decision Guide 16 | 1. **Docker Installation** (Recommended for most users) 17 | - Best for: Quick testing, production deployments 18 | - Advantages: Isolated environment, easy updates, verified base image (~17MB) 19 | - Trade-offs: Less customization flexibility 20 | 21 | 2. **Manual Installation** 22 | - Best for: Development, customization 23 | - Advantages: Full control, easier debugging, standard Go toolchain 24 | - Trade-offs: More setup steps, environment management 25 | 26 | 3. **Production Setup** 27 | - Best for: Enterprise deployments 28 | - Advantages: Scalability (tested to 100+ concurrent users), built-in monitoring 29 | - Trade-offs: More complex configuration, resource intensive 30 | 31 | Take a moment to consider your primary goal. This will help you choose the most appropriate installation method: 32 | - "I want to try Hapax quickly" → Docker Quick Start (5-minute setup) 33 | - "I need to modify the code" → Manual Installation (standard Go project) 34 | - "I'm deploying to production" → Production Setup (enterprise-ready) 35 | 36 | ## System Requirements 37 | 38 | Before you begin installation, ensure your environment meets the necessary requirements. We've separated these into mandatory and optional components to help you plan your deployment effectively. 39 | 40 | ### Mandatory Requirements (Why?) 41 | - **LLM Provider Access**: Core functionality depends on LLM API 42 | - **API Keys**: Secure provider authentication 43 | - **512MB RAM**: Verified base memory footprint 44 | - **100MB Disk**: Tested minimum storage requirement 45 | - **Go 1.22+**: Latest stable release support 46 | 47 | ### Optional Requirements (Why?) 48 | - **2+ CPU Cores**: Verified for concurrent request handling 49 | - **2GB+ RAM**: Tested for caching and queue management 50 | - **1GB+ Disk**: Validated for logging and metrics 51 | - **TLS Certificates**: Production security (HTTP/3 support) 52 | - **Docker**: Industry-standard containerization 53 | 54 | ## Installation Methods 55 | 56 | Now that you've chosen your installation method and verified your system requirements, let's proceed with the installation. Each method includes verification steps to ensure everything is working correctly. 57 | 58 | ### 1. Docker Quick Start (5 minutes) 59 | The Docker installation method provides the fastest path to a running system. It's preconfigured with sensible defaults and includes all necessary dependencies. 60 | 61 | ```bash 62 | docker run -p 8080:8080 \ 63 | -e OPENAI_API_KEY=your_key \ 64 | -e CONFIG_PATH=/app/config.yaml \ 65 | teilomillet/hapax:latest 66 | ``` 67 | 68 | After running this command, take a moment to verify the installation: 69 | ```bash 70 | # Should return HTTP 200 71 | curl http://localhost:8080/health 72 | ``` 73 | 74 | ### 2. Manual Installation (15 minutes) 75 | The manual installation gives you full control over the build process and is ideal for development work. Follow these steps carefully: 76 | 77 | 1. Clone and build: 78 | ```bash 79 | git clone https://github.com/teilomillet/hapax.git 80 | cd hapax 81 | go build -o hapax cmd/hapax/main.go 82 | ``` 83 | 84 | 2. Configure: 85 | ```bash 86 | cp config.example.yaml config.yaml 87 | # Required: Provider configuration 88 | export OPENAI_API_KEY="your_key" 89 | # Optional: Logging setup 90 | export LOG_LEVEL="info" 91 | ``` 92 | 93 | 3. Run: 94 | ```bash 95 | ./hapax --config config.yaml 96 | ``` 97 | 98 | ### 3. Production Setup (30 minutes) 99 | For production environments, we recommend this more robust setup that includes logging, monitoring, and automatic restarts: 100 | 101 | ```bash 102 | docker run -d \ 103 | --name hapax \ 104 | --restart unless-stopped \ 105 | -p 8080:8080 \ 106 | -v $(pwd)/config.yaml:/app/config.yaml \ 107 | -v $(pwd)/logs:/app/logs \ 108 | -e OPENAI_API_KEY=your_key \ 109 | --log-driver=json-file \ 110 | --log-opt max-size=10m \ 111 | teilomillet/hapax:latest 112 | ``` 113 | 114 | ## Verification Guide 115 | 116 | After installation, it's crucial to verify that everything is working correctly. We provide a series of checks that progress from basic connectivity to full functionality testing. 117 | 118 | ### How to Know It's Working 119 | 120 | 1. **Health Check** (Basic Verification) 121 | ```bash 122 | curl http://localhost:8080/health 123 | # Expected: {"status":"ok"} 124 | ``` 125 | 126 | 2. **Functionality Test** (Core Feature Check) 127 | ```bash 128 | curl -X POST http://localhost:8080/v1/completions \ 129 | -H "Content-Type: application/json" \ 130 | -d '{"messages":[{"role":"user","content":"Hello"}]}' 131 | # Expected: Response with generated content 132 | ``` 133 | 134 | 3. **Performance Check** (Optional) 135 | ```bash 136 | curl http://localhost:8080/metrics 137 | # Expected: Prometheus metrics data 138 | ``` 139 | 140 | ### Common Issues and Solutions 141 | 142 | If you encounter any issues during installation or verification, here are some common problems and their solutions: 143 | 144 | 1. **API Key Issues** 145 | - Symptom: 401 Unauthorized 146 | - Solution: Check environment variables 147 | ```bash 148 | echo $OPENAI_API_KEY # Should show your key 149 | ``` 150 | 151 | 2. **Port Conflicts** 152 | - Symptom: Address already in use 153 | - Solution: Change port in config or check running processes 154 | ```bash 155 | lsof -i :8080 # Check port usage 156 | ``` 157 | 158 | 3. **Configuration Errors** 159 | - Symptom: Server won't start 160 | - Solution: Validate configuration 161 | ```bash 162 | ./hapax --validate --config config.yaml 163 | ``` 164 | 165 | ## When Can You Use It? 166 | 167 | You'll know your Hapax installation is ready for use when you've completed these key checkpoints: 168 | 1. Health check returns `{"status":"ok"}` 169 | 2. Test completion request succeeds 170 | 3. No errors in logs (`docker logs hapax` or local logs) 171 | 172 | ### Next Steps After Installation 173 | Once your installation is verified, consider these steps to enhance your deployment: 174 | - Configure additional providers for redundancy 175 | - Enable optional features based on your needs 176 | - Set up monitoring for production visibility 177 | - Implement security measures for your environment 178 | 179 | Need help? Our documentation and community resources are here to assist: 180 | - [Configuration Guide](configuration.md) 181 | - [GitHub Issues](https://github.com/teilomillet/hapax/issues) 182 | - [Full Documentation](https://teilomillet.github.io/hapax) -------------------------------------------------------------------------------- /docs/performance.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: page 3 | title: Performance 4 | nav_order: 4 5 | --- 6 | 7 | # Performance Guide 8 | 9 | This guide covers performance optimization strategies for Hapax, including HTTP/3, caching, queuing, and load management. 10 | 11 | ## Performance Features 12 | 13 | ### HTTP/3 Support 14 | 15 | Hapax supports HTTP/3 (QUIC) for improved performance: 16 | 17 | ```yaml 18 | server: 19 | http3: 20 | enabled: true 21 | port: 443 22 | tls_cert_file: "/etc/certs/server.crt" 23 | tls_key_file: "/etc/certs/server.key" 24 | idle_timeout: 30s 25 | max_bi_streams_concurrent: 100 # Concurrent bidirectional streams 26 | max_uni_streams_concurrent: 100 # Concurrent unidirectional streams 27 | max_stream_receive_window: 6291456 # 6MB stream window 28 | max_connection_receive_window: 15728640 # 15MB connection window 29 | enable_0rtt: true # Enable 0-RTT for faster connections 30 | max_0rtt_size: 16384 # 16KB max 0-RTT size 31 | allow_0rtt_replay: false # Disable replay protection 32 | udp_receive_buffer_size: 8388608 # 8MB UDP buffer 33 | ``` 34 | 35 | Benefits of HTTP/3: 36 | - Improved connection establishment 37 | - Better multiplexing 38 | - Reduced head-of-line blocking 39 | - Enhanced mobile performance 40 | - Faster connection recovery 41 | 42 | ### Response Caching 43 | 44 | Three caching strategies available: 45 | 46 | ```yaml 47 | llm: 48 | cache: 49 | enable: true 50 | type: "redis" # Options: memory, redis, file 51 | ttl: 24h # Cache entry lifetime 52 | max_size: 1000 # Maximum entries/size 53 | redis: # Redis-specific settings 54 | address: "localhost:6379" 55 | password: ${REDIS_PASSWORD} 56 | db: 0 57 | ``` 58 | 59 | Cache types: 60 | - Memory: Fast, non-persistent, cleared on restart 61 | - Redis: Persistent, distributed, good for clusters 62 | - File: Persistent, good for single instances 63 | 64 | ### Request Queuing 65 | 66 | Queue system for high-load scenarios: 67 | 68 | ```yaml 69 | queue: 70 | enabled: true 71 | initial_size: 1000 # Starting queue capacity 72 | state_path: "/var/lib/hapax/queue.state" # Persistence path 73 | save_interval: 30s # State save frequency 74 | ``` 75 | 76 | Benefits: 77 | - Handles traffic spikes 78 | - Prevents system overload 79 | - Optional state persistence 80 | - Configurable queue size 81 | 82 | ### Circuit Breaker 83 | 84 | Protects system from cascading failures: 85 | 86 | ```yaml 87 | circuit_breaker: 88 | max_requests: 100 # Requests in half-open state 89 | interval: 30s # Monitoring interval 90 | timeout: 10s # Time in open state 91 | failure_threshold: 5 # Failures before opening 92 | ``` 93 | 94 | States: 95 | - Closed: Normal operation 96 | - Open: Stop requests after failures 97 | - Half-Open: Testing recovery 98 | 99 | ### Provider Failover 100 | 101 | Automatic provider switching for reliability: 102 | 103 | ```yaml 104 | providers: 105 | anthropic: 106 | type: anthropic 107 | model: claude-3-haiku 108 | api_key: ${ANTHROPIC_API_KEY} 109 | openai: 110 | type: openai 111 | model: gpt-4 112 | api_key: ${OPENAI_API_KEY} 113 | 114 | provider_preference: 115 | - anthropic 116 | - openai 117 | ``` 118 | 119 | Features: 120 | - Automatic failover 121 | - Health monitoring 122 | - Configurable preference order 123 | - Seamless switching 124 | 125 | ## Performance Tuning 126 | 127 | ### Memory Optimization 128 | 129 | Adjust these settings based on available memory: 130 | - `max_header_bytes`: HTTP header size limit 131 | - `max_stream_receive_window`: Per-stream buffer 132 | - `max_connection_receive_window`: Per-connection buffer 133 | - Cache size limits 134 | 135 | ### Concurrency Settings 136 | 137 | Tune these for your workload: 138 | - `max_bi_streams_concurrent`: Bidirectional streams 139 | - `max_uni_streams_concurrent`: Unidirectional streams 140 | - Queue size and persistence 141 | - Circuit breaker thresholds 142 | 143 | ### Network Optimization 144 | 145 | Network performance settings: 146 | - HTTP/3 buffer sizes 147 | - UDP receive buffer size 148 | - Idle timeouts 149 | - 0-RTT configuration 150 | 151 | ### Monitoring Performance 152 | 153 | Use built-in metrics: 154 | ```yaml 155 | routes: 156 | - path: "/metrics" 157 | handler: "metrics" 158 | version: "v1" 159 | methods: ["GET"] 160 | middleware: ["auth"] 161 | ``` 162 | 163 | Available metrics: 164 | - Request latencies 165 | - Queue lengths 166 | - Cache hit rates 167 | - Circuit breaker states 168 | - Provider health status 169 | 170 | ## Best Practices 171 | 172 | ### Development Environment 173 | ```yaml 174 | server: 175 | port: 8080 176 | http3: 177 | enabled: false 178 | llm: 179 | cache: 180 | type: "memory" 181 | max_size: 1000 182 | queue: 183 | enabled: false 184 | ``` 185 | 186 | ### Production Environment 187 | ```yaml 188 | server: 189 | port: 443 190 | http3: 191 | enabled: true 192 | max_bi_streams_concurrent: 200 193 | max_stream_receive_window: 8388608 # 8MB 194 | llm: 195 | cache: 196 | type: "redis" 197 | ttl: 24h 198 | queue: 199 | enabled: true 200 | initial_size: 5000 201 | state_path: "/var/lib/hapax/queue.state" 202 | circuit_breaker: 203 | max_requests: 200 204 | failure_threshold: 10 205 | ``` 206 | 207 | ### High-Load Environment 208 | ```yaml 209 | server: 210 | http3: 211 | max_bi_streams_concurrent: 500 212 | max_stream_receive_window: 16777216 # 16MB 213 | max_connection_receive_window: 33554432 # 32MB 214 | udp_receive_buffer_size: 16777216 # 16MB 215 | llm: 216 | cache: 217 | type: "redis" 218 | max_size: 10000 219 | queue: 220 | enabled: true 221 | initial_size: 10000 222 | circuit_breaker: 223 | max_requests: 500 224 | interval: 60s 225 | ``` 226 | 227 | ## Troubleshooting 228 | 229 | Common performance issues and solutions: 230 | 231 | ### High Latency 232 | - Enable HTTP/3 233 | - Increase stream windows 234 | - Adjust UDP buffer size 235 | - Check provider health 236 | 237 | ### Memory Usage 238 | - Reduce cache size 239 | - Lower stream limits 240 | - Adjust queue size 241 | - Monitor metrics 242 | 243 | ### Request Failures 244 | - Check circuit breaker logs 245 | - Verify provider health 246 | - Adjust retry settings 247 | - Enable failover 248 | 249 | ### Queue Overflow 250 | - Increase queue size 251 | - Enable persistence 252 | - Adjust circuit breaker 253 | - Scale horizontally -------------------------------------------------------------------------------- /errors/errors.go: -------------------------------------------------------------------------------- 1 | // Package errors provides a comprehensive error handling system for the Hapax LLM gateway. 2 | // It includes structured error types, JSON response formatting, request ID tracking, 3 | // and integrated logging with Uber's zap logger. 4 | // 5 | // The package is designed to be used throughout the Hapax codebase to provide 6 | // consistent error handling and reporting. It offers several key features: 7 | // 8 | // - Structured JSON error responses with type information 9 | // - Request ID tracking for error correlation 10 | // - Integrated logging with zap 11 | // - Custom error types for different scenarios 12 | // - Middleware integration for panic recovery 13 | // 14 | // Basic usage: 15 | // 16 | // // Simple error response 17 | // errors.Error(w, "Something went wrong", http.StatusBadRequest) 18 | // 19 | // // Type-specific error with context 20 | // errors.ErrorWithType(w, "Invalid input", errors.ValidationError, http.StatusBadRequest) 21 | // 22 | // For more complex scenarios, you can use the error constructors in types.go: 23 | // 24 | // err := errors.NewValidationError(requestID, "Invalid input", map[string]interface{}{ 25 | // "field": "username", 26 | // "error": "required", 27 | // }) 28 | package errors 29 | 30 | import ( 31 | "encoding/json" 32 | "fmt" 33 | "net/http" 34 | 35 | "go.uber.org/zap" 36 | ) 37 | 38 | // DefaultLogger is the default zap logger instance used throughout the package. 39 | // It is initialized to a production configuration but can be overridden using SetLogger. 40 | var DefaultLogger *zap.Logger 41 | 42 | func init() { 43 | var err error 44 | DefaultLogger, err = zap.NewProduction() 45 | if err != nil { 46 | DefaultLogger = zap.NewNop() 47 | } 48 | } 49 | 50 | // SetLogger allows setting a custom zap logger instance. 51 | // If nil is provided, the function will do nothing to prevent 52 | // accidentally disabling logging. 53 | func SetLogger(logger *zap.Logger) { 54 | if logger != nil { 55 | DefaultLogger = logger 56 | } 57 | } 58 | 59 | // ErrorType represents different categories of errors that can occur 60 | // in the Hapax system. Each type corresponds to a specific kind of 61 | // error scenario and carries appropriate HTTP status codes and handling logic. 62 | type ErrorType string 63 | 64 | const ( 65 | // AuthError represents authentication and authorization failures 66 | AuthError ErrorType = "authentication_error" 67 | 68 | // ValidationError represents input validation failures 69 | ValidationError ErrorType = "validation_error" 70 | 71 | // InternalError represents unexpected internal server errors 72 | InternalError ErrorType = "internal_error" 73 | 74 | // ConfigError represents configuration-related errors 75 | ConfigError ErrorType = "config_error" 76 | 77 | // ProviderError represents errors from LLM providers 78 | ProviderError ErrorType = "provider_error" 79 | 80 | // RateLimitError represents rate limiting errors 81 | RateLimitError ErrorType = "rate_limit_error" 82 | 83 | // AuthenticationError represents API key authentication failures 84 | AuthenticationError ErrorType = "api_key_error" 85 | 86 | // BadRequestError represents invalid request format or parameters 87 | BadRequestError ErrorType = "bad_request" 88 | 89 | // NotFoundError represents resource not found errors 90 | NotFoundError ErrorType = "not_found" 91 | 92 | // UnauthorizedError represents unauthorized access attempts 93 | UnauthorizedError ErrorType = "unauthorized" 94 | 95 | // TimeoutError represents timeout errors 96 | TimeoutError ErrorType = "timeout_error" 97 | ) 98 | 99 | // HapaxError is our custom error type that implements the error interface 100 | // and provides additional context about the error. It is designed to be 101 | // serialized to JSON for API responses while maintaining internal error 102 | // context for logging and debugging. 103 | type HapaxError struct { 104 | // Type categorizes the error for client handling 105 | Type ErrorType `json:"type"` 106 | 107 | // Message is a human-readable error description 108 | Message string `json:"message"` 109 | 110 | // Code is the HTTP status code (not exposed in JSON) 111 | Code int `json:"-"` 112 | 113 | // RequestID links the error to a specific request 114 | RequestID string `json:"RequestIDKey"` 115 | 116 | // Details contains additional error context 117 | Details map[string]interface{} `json:"details,omitempty"` 118 | 119 | // err is the underlying error (not exposed in JSON) 120 | err error 121 | } 122 | 123 | // Error implements the error interface. It returns a string that 124 | // combines the error type, message, and underlying error (if any). 125 | func (e *HapaxError) Error() string { 126 | if e.err != nil { 127 | return fmt.Sprintf("%s: %s: %v", e.Type, e.Message, e.err) 128 | } 129 | return fmt.Sprintf("%s: %s", e.Type, e.Message) 130 | } 131 | 132 | // Unwrap returns the underlying error, implementing the unwrap 133 | // interface for error chains. 134 | func (e *HapaxError) Unwrap() error { 135 | return e.err 136 | } 137 | 138 | // Is implements error matching for errors.Is, allowing type-based 139 | // error matching while ignoring other fields. 140 | func (e *HapaxError) Is(target error) bool { 141 | t, ok := target.(*HapaxError) 142 | if !ok { 143 | return false 144 | } 145 | return e.Type == t.Type 146 | } 147 | 148 | // WriteError formats and writes a HapaxError to an http.ResponseWriter. 149 | // It sets the appropriate content type and status code, then writes 150 | // the error as a JSON response. 151 | func WriteError(w http.ResponseWriter, err *HapaxError) { 152 | w.Header().Set("Content-Type", "application/json") 153 | w.WriteHeader(err.Code) 154 | 155 | // Check the error return from Encode 156 | if encodeErr := json.NewEncoder(w).Encode(&ErrorResponse{ 157 | Type: err.Type, 158 | Message: err.Message, 159 | RequestID: err.RequestID, 160 | Details: err.Details, 161 | }); encodeErr != nil { 162 | // Log the encoding error 163 | zap.L().Error("Failed to encode error response", zap.Error(encodeErr)) 164 | 165 | // Attempt to send a fallback error response using the existing error handling mechanism 166 | ErrorWithType(w, "Failed to encode error response", ProviderError, http.StatusInternalServerError) 167 | } 168 | } 169 | 170 | // Error is a drop-in replacement for http.Error that creates and writes 171 | // a HapaxError with the InternalError type. It automatically includes 172 | // the request ID from the response headers if available. 173 | func Error(w http.ResponseWriter, message string, code int) { 174 | requestID := w.Header().Get("X-Request-ID") 175 | err := &HapaxError{ 176 | Type: InternalError, 177 | Message: message, 178 | Code: code, 179 | RequestID: requestID, 180 | } 181 | WriteError(w, err) 182 | } 183 | 184 | // ErrorWithType is like Error but allows specifying the error type. 185 | // This is useful when you want to indicate specific error categories 186 | // to the client while maintaining the simple interface of http.Error. 187 | func ErrorWithType(w http.ResponseWriter, message string, errType ErrorType, code int) { 188 | requestID := w.Header().Get("X-Request-ID") 189 | err := &HapaxError{ 190 | Type: errType, 191 | Message: message, 192 | Code: code, 193 | RequestID: requestID, 194 | } 195 | WriteError(w, err) 196 | } 197 | -------------------------------------------------------------------------------- /errors/errors_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | ) 7 | 8 | func TestHapaxError_Error(t *testing.T) { 9 | tests := []struct { 10 | name string 11 | err *HapaxError 12 | want string 13 | wantErr bool 14 | }{ 15 | { 16 | name: "basic error without wrapped error", 17 | err: &HapaxError{ 18 | Type: ValidationError, 19 | Message: "invalid input", 20 | }, 21 | want: "validation_error: invalid input", 22 | }, 23 | { 24 | name: "error with wrapped error", 25 | err: &HapaxError{ 26 | Type: InternalError, 27 | Message: "processing failed", 28 | err: errors.New("database connection failed"), 29 | }, 30 | want: "internal_error: processing failed: database connection failed", 31 | }, 32 | } 33 | 34 | for _, tt := range tests { 35 | t.Run(tt.name, func(t *testing.T) { 36 | got := tt.err.Error() 37 | if got != tt.want { 38 | t.Errorf("HapaxError.Error() = %v, want %v", got, tt.want) 39 | } 40 | }) 41 | } 42 | } 43 | 44 | func TestHapaxError_Is(t *testing.T) { 45 | err1 := &HapaxError{Type: AuthError, Message: "test1"} 46 | err2 := &HapaxError{Type: AuthError, Message: "test2"} 47 | err3 := &HapaxError{Type: ValidationError, Message: "test3"} 48 | 49 | if !err1.Is(err2) { 50 | t.Error("Expected err1.Is(err2) to be true for same error type") 51 | } 52 | 53 | if err1.Is(err3) { 54 | t.Error("Expected err1.Is(err3) to be false for different error types") 55 | } 56 | } 57 | 58 | func TestHapaxError_Unwrap(t *testing.T) { 59 | innerErr := errors.New("inner error") 60 | err := &HapaxError{ 61 | Type: InternalError, 62 | Message: "outer error", 63 | err: innerErr, 64 | } 65 | 66 | if unwrapped := err.Unwrap(); unwrapped != innerErr { 67 | t.Errorf("Unwrap() = %v, want %v", unwrapped, innerErr) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /errors/handlers.go: -------------------------------------------------------------------------------- 1 | // Package errors provides error handling middleware and utilities. 2 | package errors 3 | 4 | import ( 5 | "net/http" 6 | "runtime/debug" 7 | 8 | "go.uber.org/zap" 9 | ) 10 | 11 | // ErrorHandler wraps an http.Handler and provides error handling 12 | // If a panic occurs during request processing, it: 13 | // 1. Logs the panic and stack trace 14 | // 2. Returns a 500 Internal Server Error to the client 15 | // 3. Includes the request ID in both the log and response 16 | // 17 | // The panic recovery ensures that the server continues running even if 18 | // individual requests panic. All panics are logged with their stack traces 19 | // for debugging purposes. 20 | // 21 | // Example usage: 22 | // 23 | // router.Use(errors.ErrorHandler(logger)) 24 | func ErrorHandler(logger *zap.Logger) func(http.Handler) http.Handler { 25 | return func(next http.Handler) http.Handler { 26 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 27 | defer func() { 28 | if err := recover(); err != nil { 29 | stack := debug.Stack() 30 | logger.Error("panic recovered", 31 | zap.Any("error", err), 32 | zap.ByteString("stacktrace", stack), 33 | zap.String(string(RequestIDKey), r.Header.Get("X-Request-ID")), 34 | ) 35 | 36 | hapaxErr := NewInternalError(r.Header.Get("X-Request-ID"), nil) 37 | WriteError(w, hapaxErr) 38 | } 39 | }() 40 | 41 | next.ServeHTTP(w, r) 42 | }) 43 | } 44 | } 45 | 46 | // LogError logs an error with its context 47 | // It ensures that all errors are properly logged with their context, including: 48 | // - Error type and message 49 | // - Request ID 50 | // - HTTP method and URL 51 | // - Status code 52 | // 53 | // Example usage: 54 | // 55 | // errors.LogError(logger, err, requestID) 56 | func LogError(logger *zap.Logger, err error, requestID string) { 57 | if hapaxErr, ok := err.(*HapaxError); ok { 58 | logger.Error("request error", 59 | zap.String("error_type", string(hapaxErr.Type)), 60 | zap.String("message", hapaxErr.Message), 61 | zap.Int("code", hapaxErr.Code), 62 | zap.String(string(RequestIDKey), requestID), 63 | zap.Any("details", hapaxErr.Details), 64 | ) 65 | } else { 66 | logger.Error("unexpected error", 67 | zap.Error(err), 68 | zap.String(string(RequestIDKey), requestID), 69 | ) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /errors/handlers_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "go.uber.org/zap" 9 | ) 10 | 11 | func TestErrorHandler(t *testing.T) { 12 | logger := zap.NewNop() 13 | 14 | tests := []struct { 15 | name string 16 | handler http.Handler 17 | expectedCode int 18 | expectPanic bool 19 | }{ 20 | { 21 | name: "normal handler", 22 | handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 23 | w.WriteHeader(http.StatusOK) 24 | }), 25 | expectedCode: http.StatusOK, 26 | expectPanic: false, 27 | }, 28 | { 29 | name: "panicking handler", 30 | handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 31 | panic("test panic") 32 | }), 33 | expectedCode: http.StatusInternalServerError, 34 | expectPanic: true, 35 | }, 36 | } 37 | 38 | for _, tt := range tests { 39 | t.Run(tt.name, func(t *testing.T) { 40 | // Create a test request 41 | req := httptest.NewRequest("GET", "/test", nil) 42 | req.Header.Set("X-Request-ID", "test-request-id") 43 | 44 | // Create a response recorder 45 | rr := httptest.NewRecorder() 46 | 47 | // Wrap the handler with our error handler 48 | handler := ErrorHandler(logger)(tt.handler) 49 | 50 | // Execute the handler 51 | handler.ServeHTTP(rr, req) 52 | 53 | // Check the status code 54 | if rr.Code != tt.expectedCode { 55 | t.Errorf("handler returned wrong status code: got %v want %v", 56 | rr.Code, tt.expectedCode) 57 | } 58 | }) 59 | } 60 | } 61 | 62 | func TestLogError(t *testing.T) { 63 | logger := zap.NewNop() 64 | requestID := "test-request-id" 65 | 66 | // Test logging a HapaxError 67 | hapaxErr := NewValidationError(requestID, "test error", nil) 68 | LogError(logger, hapaxErr, requestID) 69 | 70 | // Test logging a standard error 71 | standardErr := NewInternalError(requestID, nil) 72 | LogError(logger, standardErr, requestID) 73 | 74 | // Note: Since we're using a NOP logger, we can't verify the output 75 | // In a real application, you might want to use zap/zaptest for more detailed assertions 76 | } 77 | -------------------------------------------------------------------------------- /errors/responses.go: -------------------------------------------------------------------------------- 1 | // Package errors provides error response utilities. 2 | package errors 3 | 4 | import ( 5 | "errors" 6 | ) 7 | 8 | const RequestIDKey = "request_id" 9 | 10 | // ErrorResponse represents a standardized error response format 11 | // that is returned to clients when an error occurs. It includes: 12 | // - Error type for categorization 13 | // - Human-readable message 14 | // - Request ID for correlation 15 | // - Optional details for additional context 16 | type ErrorResponse struct { 17 | Type ErrorType `json:"type"` 18 | Message string `json:"message"` 19 | RequestID string `json:"request_id"` 20 | Details map[string]interface{} `json:"details,omitempty"` 21 | } 22 | 23 | // As is a wrapper around errors.As for better error type assertion 24 | func As(err error, target interface{}) bool { 25 | return errors.As(err, target) 26 | } 27 | -------------------------------------------------------------------------------- /errors/responses_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | ) 9 | 10 | func TestWriteError(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | err *HapaxError 14 | expectedCode int 15 | expectedType ErrorType 16 | expectedFields []string 17 | }{ 18 | { 19 | name: "hapax error", 20 | err: &HapaxError{ 21 | Type: AuthError, 22 | Message: "unauthorized", 23 | Code: http.StatusUnauthorized, 24 | RequestID: "test-id", 25 | }, 26 | expectedCode: http.StatusUnauthorized, 27 | expectedType: AuthError, 28 | expectedFields: []string{"type", "message", string(RequestIDKey)}, 29 | }, 30 | { 31 | name: "error with details", 32 | err: &HapaxError{ 33 | Type: ValidationError, 34 | Message: "validation failed", 35 | Code: http.StatusBadRequest, 36 | RequestID: "test-id", 37 | Details: map[string]interface{}{ 38 | "field": "username", 39 | "error": "required", 40 | }, 41 | }, 42 | expectedCode: http.StatusBadRequest, 43 | expectedType: ValidationError, 44 | expectedFields: []string{"type", "message", string(RequestIDKey), "details"}, 45 | }, 46 | } 47 | 48 | for _, tt := range tests { 49 | t.Run(tt.name, func(t *testing.T) { 50 | rr := httptest.NewRecorder() 51 | 52 | WriteError(rr, tt.err) 53 | 54 | if rr.Code != tt.expectedCode { 55 | t.Errorf("WriteError() status = %v, want %v", rr.Code, tt.expectedCode) 56 | } 57 | 58 | contentType := rr.Header().Get("Content-Type") 59 | if contentType != "application/json" { 60 | t.Errorf("WriteError() content-type = %v, want application/json", contentType) 61 | } 62 | 63 | var response map[string]interface{} 64 | if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { 65 | t.Fatalf("Failed to decode response body: %v", err) 66 | } 67 | 68 | if errorType, ok := response["type"].(string); !ok || ErrorType(errorType) != tt.expectedType { 69 | t.Errorf("WriteError() error type = %v, want %v", errorType, tt.expectedType) 70 | } 71 | 72 | for _, field := range tt.expectedFields { 73 | if _, exists := response[field]; !exists { 74 | t.Errorf("WriteError() missing expected field: %s", field) 75 | } 76 | } 77 | }) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /errors/types.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | // NewError creates a new HapaxError with the given parameters. 8 | // It is a general-purpose constructor that allows full control over 9 | // the error's fields. For most cases, you should use one of the 10 | // specialized constructors below. 11 | // 12 | // Example: 13 | // 14 | // err := NewError(InternalError, "database connection failed", 500, "req_123", nil, dbErr) 15 | func NewError(errType ErrorType, message string, code int, requestID string, details map[string]interface{}, err error) *HapaxError { 16 | return &HapaxError{ 17 | Type: errType, 18 | Message: message, 19 | Code: code, 20 | RequestID: requestID, 21 | Details: details, 22 | err: err, 23 | } 24 | } 25 | 26 | // NewAuthError creates an authentication error with appropriate defaults. 27 | // Use this for any authentication or authorization failures, such as: 28 | // - Invalid API keys 29 | // - Missing credentials 30 | // - Insufficient permissions 31 | // 32 | // Example: 33 | // 34 | // err := NewAuthError("req_123", "Invalid API key", nil) 35 | func NewAuthError(requestID, message string, err error) *HapaxError { 36 | return &HapaxError{ 37 | Type: AuthError, 38 | Message: message, 39 | Code: http.StatusUnauthorized, 40 | RequestID: requestID, 41 | err: err, 42 | Details: map[string]interface{}{ 43 | "suggestion": "Please check your authentication credentials", 44 | }, 45 | } 46 | } 47 | 48 | // NewValidationError creates a validation error with appropriate defaults. 49 | // Use this for any request validation failures, such as: 50 | // - Invalid input formats 51 | // - Missing required fields 52 | // - Value constraint violations 53 | // - Invalid request methods 54 | // 55 | // Example: 56 | // 57 | // err := NewValidationError("req_123", "Invalid prompt", map[string]interface{}{ 58 | // "field": "prompt", 59 | // "error": "must not be empty", 60 | // }) 61 | func NewValidationError(requestID, message string, validationDetails map[string]interface{}) *HapaxError { 62 | code := http.StatusBadRequest 63 | if message == "Method not allowed" { 64 | code = http.StatusMethodNotAllowed 65 | } 66 | return &HapaxError{ 67 | Type: ValidationError, 68 | Message: message, 69 | Code: code, 70 | RequestID: requestID, 71 | Details: validationDetails, 72 | } 73 | } 74 | 75 | // NewRateLimitError creates a rate limit error with appropriate defaults. 76 | // Use this when a client has exceeded their quota or rate limits, such as: 77 | // - Too many requests per second 78 | // - Monthly API quota exceeded 79 | // - Concurrent request limit reached 80 | // 81 | // Example: 82 | // 83 | // err := NewRateLimitError("req_123", 30) 84 | func NewRateLimitError(requestID string, retryAfter int) *HapaxError { 85 | return &HapaxError{ 86 | Type: RateLimitError, 87 | Message: "Rate limit exceeded", 88 | Code: http.StatusTooManyRequests, 89 | RequestID: requestID, 90 | Details: map[string]interface{}{ 91 | "retry_after": retryAfter, 92 | }, 93 | } 94 | } 95 | 96 | // NewProviderError creates a provider error with appropriate defaults. 97 | // Use this when the underlying LLM provider encounters an error, such as: 98 | // - Provider API errors 99 | // - Model unavailability 100 | // - Invalid provider configuration 101 | // 102 | // Example: 103 | // 104 | // err := NewProviderError("req_123", "Model unavailable", providerErr) 105 | func NewProviderError(requestID string, message string, err error) *HapaxError { 106 | return &HapaxError{ 107 | Type: ProviderError, 108 | Message: message, 109 | Code: http.StatusBadGateway, 110 | RequestID: requestID, 111 | err: err, 112 | } 113 | } 114 | 115 | // NewInternalError creates an internal server error with appropriate defaults. 116 | // Use this for unexpected errors that are not covered by other error types: 117 | // - Panics 118 | // - Database errors 119 | // - Unexpected system failures 120 | // 121 | // Example: 122 | // 123 | // err := NewInternalError("req_123", dbErr) 124 | func NewInternalError(requestID string, err error) *HapaxError { 125 | return &HapaxError{ 126 | Type: InternalError, 127 | Message: "An internal error occurred", 128 | Code: http.StatusInternalServerError, 129 | RequestID: requestID, 130 | err: err, 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /errors/types_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | ) 8 | 9 | func TestNewAuthError(t *testing.T) { 10 | requestID := "test-123" 11 | message := "invalid credentials" 12 | innerErr := errors.New("token expired") 13 | 14 | err := NewAuthError(requestID, message, innerErr) 15 | 16 | if err.Type != AuthError { 17 | t.Errorf("Expected error type %v, got %v", AuthError, err.Type) 18 | } 19 | if err.Message != message { 20 | t.Errorf("Expected message %v, got %v", message, err.Message) 21 | } 22 | if err.Code != http.StatusUnauthorized { 23 | t.Errorf("Expected code %v, got %v", http.StatusUnauthorized, err.Code) 24 | } 25 | if err.RequestID != requestID { 26 | t.Errorf("Expected requestID %v, got %v", requestID, err.RequestID) 27 | } 28 | if err.Unwrap() != innerErr { 29 | t.Errorf("Expected inner error %v, got %v", innerErr, err.Unwrap()) 30 | } 31 | } 32 | 33 | func TestNewValidationError(t *testing.T) { 34 | requestID := "test-456" 35 | message := "invalid input" 36 | details := map[string]interface{}{ 37 | "field": "email", 38 | "error": "invalid format", 39 | } 40 | 41 | err := NewValidationError(requestID, message, details) 42 | 43 | if err.Type != ValidationError { 44 | t.Errorf("Expected error type %v, got %v", ValidationError, err.Type) 45 | } 46 | if err.Message != message { 47 | t.Errorf("Expected message %v, got %v", message, err.Message) 48 | } 49 | if err.Code != http.StatusBadRequest { 50 | t.Errorf("Expected code %v, got %v", http.StatusBadRequest, err.Code) 51 | } 52 | if err.RequestID != requestID { 53 | t.Errorf("Expected requestID %v, got %v", requestID, err.RequestID) 54 | } 55 | if err.Details["field"] != details["field"] { 56 | t.Errorf("Expected details field %v, got %v", details["field"], err.Details["field"]) 57 | } 58 | } 59 | 60 | func TestNewRateLimitError(t *testing.T) { 61 | requestID := "test-789" 62 | retryAfter := 60 63 | 64 | err := NewRateLimitError(requestID, retryAfter) 65 | 66 | if err.Type != RateLimitError { 67 | t.Errorf("Expected error type %v, got %v", RateLimitError, err.Type) 68 | } 69 | if err.Code != http.StatusTooManyRequests { 70 | t.Errorf("Expected code %v, got %v", http.StatusTooManyRequests, err.Code) 71 | } 72 | if err.Details["retry_after"] != retryAfter { 73 | t.Errorf("Expected retry_after %v, got %v", retryAfter, err.Details["retry_after"]) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /examples/hapax.yaml: -------------------------------------------------------------------------------- 1 | # Hapax Server Configuration Example 2 | # This example demonstrates all available configuration options, 3 | # including optional features like caching and retries. 4 | 5 | server: 6 | port: 8081 7 | read_timeout: 30s 8 | write_timeout: 30s 9 | max_header_bytes: 1048576 # 1MB 10 | shutdown_timeout: 30s 11 | 12 | llm: 13 | # Provider Configuration 14 | provider: anthropic # openai, anthropic, ollama 15 | model: claude-3-haiku-20240307 16 | api_key: "${ANTHROPIC_API_KEY}" # Will be replaced with environment variable 17 | endpoint: "https://api.anthropic.com/v1" 18 | system_prompt: "You are Claude, a helpful AI assistant." 19 | 20 | # Token Management 21 | max_context_tokens: 200000 # Claude-3-Haiku context window 22 | 23 | # Generation Parameters 24 | options: 25 | temperature: 0.7 # Controls randomness (0.0-1.0) 26 | max_tokens: 4096 # Maximum tokens to generate 27 | top_p: 1 # Nucleus sampling threshold 28 | frequency_penalty: 0 # Reduces word repetition 29 | presence_penalty: 0 # Encourages topic diversity 30 | # Additional model-specific options can be added here 31 | 32 | # Caching Configuration (Optional) 33 | cache: 34 | enable: true 35 | type: memory # memory, redis, or file 36 | ttl: 24h # Cache entry lifetime 37 | max_size: 1000 # Maximum entries for memory cache 38 | # Redis Configuration (if type: redis) 39 | redis: 40 | address: localhost:6379 41 | password: "" # Optional Redis password 42 | db: 0 # Redis database number 43 | # File Cache Configuration (if type: file) 44 | dir: ./cache # Cache directory path 45 | 46 | # Retry Configuration (Optional) 47 | retry: 48 | max_retries: 3 49 | initial_delay: 1s 50 | max_delay: 30s 51 | multiplier: 2 52 | retryable_errors: 53 | - rate_limit 54 | - timeout 55 | - server_error 56 | 57 | # Request Processing Configuration 58 | processing: 59 | # Templates for different request types 60 | request_templates: 61 | # Simple completion template 62 | default: "{{.Input}}" 63 | 64 | # Chat completion template 65 | chat: | 66 | {{range .Messages}} 67 | {{.Role}}: {{.Content}} 68 | {{end}} 69 | 70 | # Function calling template (future) 71 | function: | 72 | Function Description: 73 | {{.FunctionDescription}} 74 | 75 | Input: 76 | {{.Input}} 77 | 78 | # Response Formatting Options 79 | response_formatting: 80 | clean_json: true # Remove markdown and format JSON 81 | trim_whitespace: true # Remove extra whitespace 82 | max_length: 8192 # Maximum response length 83 | 84 | logging: 85 | level: info # debug, info, warn, error 86 | format: json # json or text 87 | 88 | routes: 89 | - path: "/completions" 90 | handler: "completion" 91 | version: "v1" 92 | methods: ["POST"] 93 | middleware: ["auth", "ratelimit"] 94 | headers: 95 | Content-Type: "application/json" 96 | health_check: 97 | enabled: true 98 | interval: 30s 99 | timeout: 5s 100 | threshold: 3 101 | checks: 102 | api: "http" 103 | 104 | - path: "/health" 105 | handler: "health" 106 | version: "v1" 107 | methods: ["GET"] 108 | health_check: 109 | enabled: true 110 | interval: 15s 111 | timeout: 2s 112 | threshold: 2 113 | checks: 114 | system: "tcp" 115 | -------------------------------------------------------------------------------- /examples/requests.md: -------------------------------------------------------------------------------- 1 | # Hapax Request Examples 2 | 3 | This document shows how to use different types of requests with Hapax. 4 | 5 | ## Simple Completion (Default) 6 | 7 | The simplest type of request. Just provide an input text and get a completion. 8 | 9 | ```bash 10 | # Using curl 11 | curl -X POST http://localhost:8081/v1/completions \ 12 | -H "Content-Type: application/json" \ 13 | -d '{ 14 | "prompt": "What is the capital of France?" 15 | }' 16 | ``` 17 | 18 | ```json 19 | // Response 20 | { 21 | "content": "The capital of France is Paris." 22 | } 23 | ``` 24 | 25 | ## Chat Completion 26 | 27 | For chat-style interactions with message history. 28 | 29 | ```bash 30 | # Using curl 31 | curl -X POST "http://localhost:8081/v1/completions?type=chat" \ 32 | -H "Content-Type: application/json" \ 33 | -d '{ 34 | "messages": [ 35 | {"role": "system", "content": "You are a helpful assistant."}, 36 | {"role": "user", "content": "Hi, how are you?"}, 37 | {"role": "assistant", "content": "I'm doing well, thank you! How can I help you today?"}, 38 | {"role": "user", "content": "What's the weather like?"} 39 | ] 40 | }' 41 | ``` 42 | 43 | ```json 44 | // Response 45 | { 46 | "content": "I apologize, but I don't have access to real-time weather information. To get accurate weather information, I recommend checking a weather service or website for your specific location." 47 | } 48 | ``` 49 | 50 | ## Function Calling (Future) 51 | 52 | For structured function-like interactions. 53 | 54 | ```bash 55 | # Using curl 56 | curl -X POST "http://localhost:8081/v1/completions?type=function" \ 57 | -H "Content-Type: application/json" \ 58 | -d '{ 59 | "function_description": "Get the weather for a specific location", 60 | "input": "What's the weather like in Paris?" 61 | }' 62 | ``` 63 | 64 | ```json 65 | // Response 66 | { 67 | "content": "{\"function\": \"get_weather\", \"location\": \"Paris\", \"unit\": \"celsius\"}" 68 | } 69 | ``` 70 | 71 | ## Request Type Selection 72 | 73 | 1. **Query Parameter**: Add `?type=chat` or `?type=function` to the URL 74 | 2. **Default Behavior**: If no type is specified, the request is treated as a simple completion 75 | 3. **Request Format**: 76 | - Simple completion: Just needs `input` 77 | - Chat: Requires `messages` array with `role` and `content` 78 | - Function: Needs both `input` and `function_description` 79 | 80 | ## Response Formatting 81 | 82 | All responses are formatted according to the configuration: 83 | - JSON responses are cleaned and properly formatted 84 | - Whitespace is trimmed 85 | - Responses are truncated to the configured maximum length 86 | 87 | ## Error Handling 88 | 89 | ```json 90 | // Example error response 91 | { 92 | "error": "Invalid chat request: messages array cannot be empty", 93 | "status": 400 94 | } 95 | ``` 96 | 97 | Common error cases: 98 | 1. Missing required fields 99 | 2. Invalid JSON format 100 | 3. Empty messages array in chat requests 101 | 4. Request processing failures 102 | 5. LLM errors 103 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/teilomillet/hapax 2 | 3 | go 1.22.5 4 | 5 | toolchain go1.22.10 6 | 7 | require ( 8 | github.com/eapache/queue/v2 v2.0.0-20230407133247-75960ed334e4 9 | github.com/fsnotify/fsnotify v1.8.0 10 | github.com/go-chi/chi/v5 v5.2.0 11 | github.com/go-playground/validator/v10 v10.22.0 12 | github.com/google/uuid v1.3.0 13 | github.com/pkoukk/tiktoken-go v0.1.7 14 | github.com/prometheus/client_golang v1.20.5 15 | github.com/sony/gobreaker v1.0.0 16 | github.com/stretchr/testify v1.9.0 17 | github.com/teilomillet/gollm v0.1.1 18 | go.uber.org/zap v1.27.0 19 | golang.org/x/sync v0.10.0 20 | golang.org/x/time v0.8.0 21 | gopkg.in/yaml.v3 v3.0.1 22 | ) 23 | 24 | require ( 25 | github.com/bahlo/generic-list-go v0.2.0 // indirect 26 | github.com/beorn7/perks v1.0.1 // indirect 27 | github.com/buger/jsonparser v1.1.1 // indirect 28 | github.com/caarlos0/env/v11 v11.1.0 // indirect 29 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 30 | github.com/davecgh/go-spew v1.1.1 // indirect 31 | github.com/dlclark/regexp2 v1.10.0 // indirect 32 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 33 | github.com/go-playground/locales v0.14.1 // indirect 34 | github.com/go-playground/universal-translator v0.18.1 // indirect 35 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect 36 | github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect 37 | github.com/invopop/jsonschema v0.12.0 // indirect 38 | github.com/klauspost/compress v1.17.9 // indirect 39 | github.com/kylelemons/godebug v1.1.0 // indirect 40 | github.com/leodido/go-urn v1.4.0 // indirect 41 | github.com/mailru/easyjson v0.7.7 // indirect 42 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 43 | github.com/onsi/ginkgo/v2 v2.9.5 // indirect 44 | github.com/pmezard/go-difflib v1.0.0 // indirect 45 | github.com/prometheus/client_model v0.6.1 // indirect 46 | github.com/prometheus/common v0.55.0 // indirect 47 | github.com/prometheus/procfs v0.15.1 // indirect 48 | github.com/quic-go/qpack v0.5.1 // indirect 49 | github.com/quic-go/quic-go v0.48.2 // indirect 50 | github.com/stretchr/objx v0.5.2 // indirect 51 | github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect 52 | go.uber.org/mock v0.4.0 // indirect 53 | go.uber.org/multierr v1.11.0 // indirect 54 | golang.org/x/crypto v0.31.0 // indirect 55 | golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect 56 | golang.org/x/mod v0.17.0 // indirect 57 | golang.org/x/net v0.33.0 // indirect 58 | golang.org/x/sys v0.28.0 // indirect 59 | golang.org/x/text v0.21.0 // indirect 60 | golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect 61 | google.golang.org/protobuf v1.34.2 // indirect 62 | ) 63 | -------------------------------------------------------------------------------- /golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | tests: true 4 | # Include test files for analysis 5 | build-tags: 6 | - integration 7 | 8 | linters: 9 | disable-all: true 10 | enable: 11 | - gofmt 12 | - govet 13 | - revive # Modern replacement for golint 14 | - gosimple 15 | - staticcheck 16 | - errcheck 17 | - ineffassign 18 | - unconvert 19 | - misspell 20 | - gosec 21 | - bodyclose # Checks whether HTTP response bodies are closed 22 | - gocyclo # Check function complexity 23 | - goimports # Checks imports ordering 24 | - unused # Checks for unused constants, variables, functions and types 25 | 26 | linters-settings: 27 | gocyclo: 28 | # Minimal complexity of function to report 29 | min-complexity: 15 30 | revive: 31 | rules: 32 | - name: exported 33 | arguments: 34 | - "checkPrivateReceivers" 35 | - "sayRepetitiveInsteadOf" 36 | 37 | issues: 38 | exclude-use-default: false 39 | max-issues-per-linter: 0 40 | max-same-issues: 0 41 | exclude-rules: 42 | - path: _test\.go 43 | linters: 44 | - gosec 45 | - errcheck 46 | 47 | output: 48 | format: colored-line-number 49 | print-issued-lines: true 50 | print-linter-name: true -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | "syscall" 9 | 10 | "github.com/teilomillet/hapax/errors" 11 | "github.com/teilomillet/hapax/server" 12 | "go.uber.org/zap" 13 | ) 14 | 15 | func main() { 16 | // Create logger with explicit error handling 17 | logger, err := zap.NewProduction() 18 | if err != nil { 19 | // Fail fast if logger creation fails 20 | fmt.Printf("Critical error: Failed to create logger: %v\n", err) 21 | os.Exit(1) 22 | } 23 | 24 | // Ensure logger is synced, with robust error handling 25 | defer func() { 26 | if syncErr := logger.Sync(); syncErr != nil { 27 | // Log sync failure, but don't mask the original error 28 | fmt.Printf("Warning: Failed to sync logger: %v\n", syncErr) 29 | } 30 | }() 31 | 32 | // Set global logger 33 | errors.SetLogger(logger) 34 | 35 | // Configuration and server setup with comprehensive error handling 36 | configPath := "config.yaml" 37 | server, err := server.NewServer(configPath, logger) 38 | if err != nil { 39 | logger.Fatal("Server initialization failed", 40 | zap.Error(err), 41 | zap.String("config_path", configPath), 42 | ) 43 | } 44 | 45 | // Graceful shutdown infrastructure 46 | ctx, cancel := context.WithCancel(context.Background()) 47 | defer cancel() 48 | 49 | // Signal handling with detailed logging 50 | sigChan := make(chan os.Signal, 1) 51 | signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM) 52 | 53 | go func() { 54 | sig := <-sigChan 55 | logger.Info("Shutdown signal received", 56 | zap.String("signal", sig.String()), 57 | zap.String("action", "initiating graceful shutdown"), 58 | ) 59 | cancel() 60 | }() 61 | 62 | // Server start with comprehensive error tracking 63 | if err := server.Start(ctx); err != nil { 64 | logger.Fatal("Server startup or runtime error", 65 | zap.Error(err), 66 | zap.String("action", "server_start_failed"), 67 | ) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | scrape_configs: 6 | - job_name: 'hapax' 7 | static_configs: 8 | - targets: ['hapax:8080'] 9 | metrics_path: '/metrics' 10 | -------------------------------------------------------------------------------- /server/circuitbreaker/circuitbreaker.go: -------------------------------------------------------------------------------- 1 | // Package circuitbreaker provides an implementation of a circuit breaker pattern 2 | // to manage service calls and handle failures gracefully. 3 | 4 | package circuitbreaker 5 | 6 | import ( 7 | "fmt" 8 | "time" 9 | 10 | "github.com/prometheus/client_golang/prometheus" 11 | "github.com/sony/gobreaker" 12 | "go.uber.org/zap" 13 | ) 14 | 15 | // Config represents the configuration settings for a CircuitBreaker instance. 16 | type Config struct { 17 | // Name is the unique identifier for the circuit breaker. 18 | Name string 19 | // MaxRequests is the maximum number of requests allowed within the Interval. 20 | MaxRequests uint32 21 | // Interval is the time window for measuring the number of requests. 22 | Interval time.Duration 23 | // Timeout is the time limit for a single request. 24 | Timeout time.Duration 25 | // FailureThreshold is the number of consecutive failures required to trip the circuit breaker. 26 | FailureThreshold uint32 27 | // TestMode indicates whether the circuit breaker is running in test mode. 28 | TestMode bool 29 | } 30 | 31 | // CircuitBreaker represents a circuit breaker instance with its configuration and state. 32 | type CircuitBreaker struct { 33 | // name is the unique identifier for the circuit breaker. 34 | name string 35 | // logger is the logger instance for logging events. 36 | logger *zap.Logger 37 | // metrics holds Prometheus metrics for the circuit breaker. 38 | metrics *metrics 39 | // breaker is the underlying gobreaker instance. 40 | breaker *gobreaker.CircuitBreaker 41 | } 42 | 43 | // metrics holds Prometheus metrics for the circuit breaker. 44 | type metrics struct { 45 | // stateGauge tracks the current state of the circuit breaker. 46 | stateGauge prometheus.Gauge 47 | // failureCount tracks the total number of failures. 48 | failureCount prometheus.Counter 49 | // tripsTotal tracks the total number of times the circuit breaker has tripped. 50 | tripsTotal prometheus.Counter 51 | } 52 | 53 | // initCircuitBreaker initializes a new CircuitBreaker instance and sets up metrics. 54 | // It returns the initialized CircuitBreaker and any error encountered during initialization. 55 | func initCircuitBreaker(config Config, logger *zap.Logger, registry *prometheus.Registry) (*CircuitBreaker, error) { 56 | // Check if the circuit breaker name is empty. 57 | if config.Name == "" { 58 | return nil, fmt.Errorf("circuit breaker name cannot be empty") 59 | } 60 | 61 | // Create a new CircuitBreaker instance. 62 | cb := &CircuitBreaker{ 63 | name: config.Name, 64 | logger: logger, 65 | } 66 | 67 | // Initialize metrics if not in test mode. 68 | if registry != nil && !config.TestMode { 69 | // Create a new metrics instance. 70 | cb.metrics = &metrics{ 71 | stateGauge: prometheus.NewGauge(prometheus.GaugeOpts{ 72 | Name: "circuit_breaker_state", 73 | Help: "Current state of the circuit breaker (0=closed, 1=half-open, 2=open)", 74 | ConstLabels: prometheus.Labels{ 75 | "name": config.Name, 76 | }, 77 | }), 78 | failureCount: prometheus.NewCounter(prometheus.CounterOpts{ 79 | Name: "circuit_breaker_failures_total", 80 | Help: "Total number of failures", 81 | ConstLabels: prometheus.Labels{ 82 | "name": config.Name, 83 | }, 84 | }), 85 | tripsTotal: prometheus.NewCounter(prometheus.CounterOpts{ 86 | Name: "circuit_breaker_trips_total", 87 | Help: "Total number of times the circuit breaker has tripped", 88 | ConstLabels: prometheus.Labels{ 89 | "name": config.Name, 90 | }, 91 | }), 92 | } 93 | 94 | // Register metrics with the Prometheus registry. 95 | registry.MustRegister(cb.metrics.stateGauge) 96 | registry.MustRegister(cb.metrics.failureCount) 97 | registry.MustRegister(cb.metrics.tripsTotal) 98 | } 99 | 100 | return cb, nil 101 | } 102 | 103 | // configureCircuitBreaker sets the configuration settings for the CircuitBreaker instance. 104 | // It configures the gobreaker settings, including the trip conditions and state change handlers. 105 | func configureCircuitBreaker(cb *CircuitBreaker, config Config, logger *zap.Logger) { 106 | // Create a new gobreaker settings instance. 107 | settings := gobreaker.Settings{ 108 | Name: config.Name, 109 | MaxRequests: config.MaxRequests, 110 | Interval: config.Interval, 111 | Timeout: config.Timeout, 112 | 113 | // ReadyToTrip determines if the circuit breaker should trip based on consecutive failures. 114 | ReadyToTrip: func(counts gobreaker.Counts) bool { 115 | // Check if the number of consecutive failures exceeds the threshold. 116 | shouldTrip := counts.ConsecutiveFailures >= config.FailureThreshold 117 | if shouldTrip { 118 | // Log a message when the circuit breaker trips. 119 | logger.Info("Circuit breaker tripping", 120 | zap.String("name", config.Name), 121 | zap.Uint32("consecutive_failures", counts.ConsecutiveFailures), 122 | zap.Uint32("threshold", config.FailureThreshold)) 123 | } 124 | return shouldTrip 125 | }, 126 | 127 | // OnStateChange handles actions to take when the circuit breaker state changes. 128 | OnStateChange: func(name string, from, to gobreaker.State) { 129 | // Log a message when the circuit breaker state changes. 130 | logger.Info("Circuit breaker state changed", 131 | zap.String("name", name), 132 | zap.String("from", from.String()), 133 | zap.String("to", to.String())) 134 | 135 | // Update metrics based on the new state. 136 | if cb.metrics != nil { 137 | switch to { 138 | case gobreaker.StateOpen: 139 | cb.metrics.stateGauge.Set(2) 140 | cb.metrics.tripsTotal.Inc() 141 | case gobreaker.StateHalfOpen: 142 | cb.metrics.stateGauge.Set(1) 143 | case gobreaker.StateClosed: 144 | cb.metrics.stateGauge.Set(0) 145 | } 146 | } 147 | }, 148 | } 149 | 150 | // Create a new gobreaker instance with the configured settings. 151 | cb.breaker = gobreaker.NewCircuitBreaker(settings) 152 | } 153 | 154 | // NewCircuitBreaker creates a new CircuitBreaker instance and configures it with the provided settings. 155 | // It returns the configured CircuitBreaker instance and any error that occurred during initialization. 156 | func NewCircuitBreaker(config Config, logger *zap.Logger, registry *prometheus.Registry) (*CircuitBreaker, error) { 157 | // Initialize the CircuitBreaker instance. 158 | cb, err := initCircuitBreaker(config, logger, registry) 159 | if err != nil { 160 | return nil, err 161 | } 162 | 163 | // Configure the CircuitBreaker instance with the provided settings. 164 | configureCircuitBreaker(cb, config, logger) 165 | 166 | return cb, nil 167 | } 168 | 169 | // Execute executes a function within the circuit breaker. 170 | // It returns any error that occurred during execution. 171 | func (cb *CircuitBreaker) Execute(operation func() error) error { 172 | // Execute the function within the circuit breaker. 173 | result, err := cb.breaker.Execute(func() (interface{}, error) { 174 | // Call the operation function. 175 | if err := operation(); err != nil { 176 | // Increment the failure count if the operation fails. 177 | if cb.metrics != nil { 178 | cb.metrics.failureCount.Inc() 179 | } 180 | // Log a message when the operation fails. 181 | cb.logger.Debug("Operation failed", 182 | zap.String("name", cb.name), 183 | zap.Error(err)) 184 | return nil, err 185 | } 186 | return nil, nil 187 | }) 188 | 189 | // Check if the circuit breaker is open. 190 | if err != nil { 191 | if err == gobreaker.ErrOpenState { 192 | // Log a message when the circuit breaker is open. 193 | cb.logger.Debug("Circuit breaker is open", 194 | zap.String("name", cb.name)) 195 | } 196 | return err 197 | } 198 | 199 | // Ignore the result since we don't use it. 200 | _ = result 201 | return nil 202 | } 203 | 204 | // State returns the current state of the circuit breaker. 205 | func (cb *CircuitBreaker) State() gobreaker.State { 206 | return cb.breaker.State() 207 | } 208 | 209 | // Counts returns the current counts of the circuit breaker. 210 | func (cb *CircuitBreaker) Counts() gobreaker.Counts { 211 | return cb.breaker.Counts() 212 | } 213 | -------------------------------------------------------------------------------- /server/circuitbreaker/errors.go: -------------------------------------------------------------------------------- 1 | package circuitbreaker 2 | 3 | import "errors" 4 | 5 | var ( 6 | // ErrCircuitOpen is returned when the circuit breaker is open 7 | ErrCircuitOpen = errors.New("circuit breaker is open") 8 | ) 9 | -------------------------------------------------------------------------------- /server/handlers/integration_test.go: -------------------------------------------------------------------------------- 1 | package handlers 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "net/http" 8 | "net/http/httptest" 9 | "testing" 10 | "time" 11 | 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | "github.com/teilomillet/hapax/config" 15 | "github.com/teilomillet/hapax/errors" 16 | "github.com/teilomillet/hapax/server/metrics" 17 | "github.com/teilomillet/hapax/server/middleware" 18 | "github.com/teilomillet/hapax/server/processing" 19 | "github.com/teilomillet/gollm" 20 | "github.com/teilomillet/hapax/server/mocks" 21 | "go.uber.org/zap" 22 | ) 23 | 24 | // TestCompletionHandlerIntegration tests the CompletionHandler integrated with: 25 | // - Router for request routing 26 | // - Middleware for request ID and rate limiting 27 | // - Error handling middleware 28 | // - Logging middleware 29 | func TestCompletionHandlerIntegration(t *testing.T) { 30 | // Create metrics 31 | m := metrics.NewMetrics() 32 | 33 | // Create mock LLM 34 | mockLLM := mocks.NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 35 | // If context has timeout header, simulate timeout 36 | if ctx.Value(middleware.XTestTimeoutKey) != nil { 37 | // Sleep longer than the timeout 38 | time.Sleep(5 * time.Second) 39 | } 40 | return "Mock response", nil 41 | }) 42 | 43 | // Create logger 44 | logger := zap.NewNop() 45 | 46 | // Create processor 47 | cfg := &config.ProcessingConfig{ 48 | RequestTemplates: map[string]string{ 49 | "default": "{{.Input}}", 50 | "chat": "{{range .Messages}}{{.Role}}: {{.Content}}\n{{end}}", 51 | "function": "Function: {{.FunctionDescription}}\nInput: {{.Input}}", 52 | }, 53 | } 54 | processor, err := processing.NewProcessor(cfg, mockLLM) 55 | require.NoError(t, err) 56 | 57 | // Create handler 58 | handler := NewCompletionHandler(processor, logger) 59 | 60 | // Create middleware chain 61 | chain := middleware.RequestID( 62 | middleware.PrometheusMetrics(m)( 63 | middleware.RateLimit(m)( 64 | middleware.Timeout(5*time.Second)(handler), 65 | ), 66 | ), 67 | ) 68 | 69 | // Create test server 70 | ts := httptest.NewServer(chain) 71 | defer ts.Close() 72 | 73 | tests := []struct { 74 | name string 75 | method string 76 | path string 77 | requestBody interface{} 78 | headers map[string]string 79 | expectedCode int 80 | expectedError *errors.ErrorResponse 81 | setup func(t *testing.T, ts *httptest.Server) 82 | }{ 83 | { 84 | name: "method not allowed", 85 | method: http.MethodGet, 86 | path: "/v1/completions", 87 | expectedCode: http.StatusMethodNotAllowed, 88 | expectedError: &errors.ErrorResponse{ 89 | Type: errors.ValidationError, 90 | Message: "Method not allowed", 91 | Details: map[string]interface{}{ 92 | "method": http.MethodGet, 93 | "allowed_methods": []string{http.MethodPost}, 94 | }, 95 | }, 96 | }, 97 | { 98 | name: "missing content type", 99 | method: http.MethodPost, 100 | path: "/v1/completions", 101 | requestBody: CompletionRequest{Input: "test"}, 102 | expectedCode: http.StatusBadRequest, 103 | expectedError: &errors.ErrorResponse{ 104 | Type: errors.ValidationError, 105 | Message: "Content-Type header required", 106 | Details: map[string]interface{}{ 107 | "required_content_type": "application/json", 108 | }, 109 | }, 110 | }, 111 | { 112 | name: "rate limit exceeded", 113 | method: http.MethodPost, 114 | path: "/v1/completions", 115 | headers: map[string]string{"Content-Type": "application/json"}, 116 | requestBody: CompletionRequest{Input: "test"}, 117 | expectedCode: http.StatusTooManyRequests, 118 | expectedError: &errors.ErrorResponse{ 119 | Type: errors.RateLimitError, 120 | Message: "Rate limit exceeded", 121 | Details: map[string]interface{}{ 122 | "limit": 10, 123 | "window": "1m0s", 124 | }, 125 | }, 126 | setup: func(t *testing.T, ts *httptest.Server) { 127 | // Reset rate limiters before starting 128 | middleware.ResetRateLimiters() 129 | 130 | // Make 10 successful requests first 131 | for i := 0; i < 10; i++ { 132 | body, err := json.Marshal(CompletionRequest{Input: "test"}) 133 | require.NoError(t, err) 134 | req, err := http.NewRequest(http.MethodPost, ts.URL+"/v1/completions", bytes.NewReader(body)) 135 | require.NoError(t, err) 136 | req.Header.Set("Content-Type", "application/json") 137 | resp, err := http.DefaultClient.Do(req) 138 | require.NoError(t, err) 139 | require.Equal(t, http.StatusOK, resp.StatusCode) 140 | resp.Body.Close() 141 | } 142 | 143 | // The next request should fail 144 | body, err := json.Marshal(CompletionRequest{Input: "test"}) 145 | require.NoError(t, err) 146 | req, err := http.NewRequest(http.MethodPost, ts.URL+"/v1/completions", bytes.NewReader(body)) 147 | require.NoError(t, err) 148 | req.Header.Set("Content-Type", "application/json") 149 | resp, err := http.DefaultClient.Do(req) 150 | require.NoError(t, err) 151 | require.Equal(t, http.StatusTooManyRequests, resp.StatusCode) 152 | resp.Body.Close() 153 | }, 154 | }, 155 | { 156 | name: "malformed json", 157 | method: http.MethodPost, 158 | path: "/v1/completions", 159 | headers: map[string]string{"Content-Type": "application/json"}, 160 | requestBody: "{invalid json}", 161 | expectedCode: http.StatusBadRequest, 162 | expectedError: &errors.ErrorResponse{ 163 | Type: errors.ValidationError, 164 | Message: "Invalid completion request format", 165 | Details: map[string]interface{}{ 166 | "type": "default", 167 | }, 168 | }, 169 | }, 170 | { 171 | name: "context timeout", 172 | method: http.MethodPost, 173 | path: "/v1/completions", 174 | headers: map[string]string{ 175 | "Content-Type": "application/json", 176 | "X-Test-Timeout": "true", 177 | }, 178 | requestBody: CompletionRequest{Input: "test"}, 179 | expectedCode: http.StatusGatewayTimeout, 180 | expectedError: &errors.ErrorResponse{ 181 | Type: errors.InternalError, 182 | Message: "Request timeout", 183 | Details: map[string]interface{}{ 184 | "timeout": "5s", 185 | }, 186 | }, 187 | }, 188 | } 189 | 190 | for _, tt := range tests { 191 | t.Run(tt.name, func(t *testing.T) { 192 | // Reset rate limiters before each test 193 | middleware.ResetRateLimiters() 194 | 195 | // Run setup first if it exists 196 | if tt.setup != nil { 197 | tt.setup(t, ts) 198 | } 199 | 200 | // Create request 201 | var body []byte 202 | if str, ok := tt.requestBody.(string); ok { 203 | body = []byte(str) 204 | } else { 205 | var err error 206 | body, err = json.Marshal(tt.requestBody) 207 | require.NoError(t, err) 208 | } 209 | 210 | // Create request with context 211 | req, err := http.NewRequest(tt.method, ts.URL+tt.path, bytes.NewReader(body)) 212 | require.NoError(t, err) 213 | 214 | // Add headers 215 | for k, v := range tt.headers { 216 | req.Header.Set(k, v) 217 | } 218 | 219 | // Send request 220 | resp, err := http.DefaultClient.Do(req) 221 | require.NoError(t, err) 222 | defer resp.Body.Close() 223 | 224 | // Verify status code 225 | assert.Equal(t, tt.expectedCode, resp.StatusCode) 226 | 227 | if tt.expectedError != nil { 228 | var gotError errors.ErrorResponse 229 | err := json.NewDecoder(resp.Body).Decode(&gotError) 230 | require.NoError(t, err) 231 | 232 | assert.Equal(t, tt.expectedError.Type, gotError.Type) 233 | assert.Equal(t, tt.expectedError.Message, gotError.Message) 234 | assert.NotEmpty(t, gotError.RequestID) 235 | 236 | // Compare details, handling slice type differences 237 | if tt.expectedError.Details != nil { 238 | assert.Equal(t, len(tt.expectedError.Details), len(gotError.Details)) 239 | for k, v := range tt.expectedError.Details { 240 | gotV, ok := gotError.Details[k] 241 | assert.True(t, ok, "missing key %s in error details", k) 242 | 243 | // Special handling for slices 244 | if expSlice, ok := v.([]string); ok { 245 | if gotSlice, ok := gotV.([]interface{}); ok { 246 | assert.Equal(t, len(expSlice), len(gotSlice), "slice length mismatch for key %s", k) 247 | for i := range expSlice { 248 | assert.Equal(t, expSlice[i], gotSlice[i].(string)) 249 | } 250 | continue 251 | } 252 | } 253 | 254 | // Special handling for numbers from JSON 255 | if expInt, ok := v.(int); ok { 256 | if gotFloat, ok := gotV.(float64); ok { 257 | assert.Equal(t, float64(expInt), gotFloat) 258 | continue 259 | } 260 | } 261 | 262 | // Regular comparison for other values 263 | assert.Equal(t, v, gotV) 264 | } 265 | } 266 | } 267 | }) 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /server/http3_0rtt_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "crypto/rand" 7 | "crypto/rsa" 8 | "crypto/tls" 9 | "crypto/x509" 10 | "crypto/x509/pkix" 11 | "encoding/json" 12 | "encoding/pem" 13 | "math/big" 14 | "net/http" 15 | "os" 16 | "testing" 17 | "time" 18 | 19 | "github.com/quic-go/quic-go" 20 | "github.com/quic-go/quic-go/http3" 21 | "github.com/stretchr/testify/assert" 22 | "github.com/stretchr/testify/require" 23 | "github.com/teilomillet/gollm" 24 | "github.com/teilomillet/hapax/config" 25 | "github.com/teilomillet/hapax/server/mocks" 26 | "go.uber.org/zap/zaptest" 27 | ) 28 | 29 | func generateTestCertificates(t *testing.T) (string, string) { 30 | certFile, err := os.CreateTemp("", "cert*.pem") 31 | require.NoError(t, err) 32 | keyFile, err := os.CreateTemp("", "key*.pem") 33 | require.NoError(t, err) 34 | 35 | // Generate self-signed certificate 36 | priv, err := rsa.GenerateKey(rand.Reader, 2048) 37 | require.NoError(t, err) 38 | 39 | template := x509.Certificate{ 40 | SerialNumber: big.NewInt(1), 41 | Subject: pkix.Name{ 42 | Organization: []string{"Test Co"}, 43 | }, 44 | NotBefore: time.Now(), 45 | NotAfter: time.Now().Add(time.Hour * 24 * 180), 46 | 47 | KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, 48 | ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, 49 | BasicConstraintsValid: true, 50 | DNSNames: []string{"localhost"}, 51 | } 52 | 53 | derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv) 54 | require.NoError(t, err) 55 | 56 | // Write certificate 57 | err = pem.Encode(certFile, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes}) 58 | require.NoError(t, err) 59 | 60 | // Write private key 61 | privBytes := x509.MarshalPKCS1PrivateKey(priv) 62 | err = pem.Encode(keyFile, &pem.Block{Type: "RSA PRIVATE KEY", Bytes: privBytes}) 63 | require.NoError(t, err) 64 | 65 | certFile.Close() 66 | keyFile.Close() 67 | 68 | return certFile.Name(), keyFile.Name() 69 | } 70 | 71 | func TestHTTP3_0RTT(t *testing.T) { 72 | // HTTP/3 (QUIC) requires specific UDP buffer sizes to function properly. 73 | // The quic-go library needs at least 7MB (7168 KB) for optimal performance. 74 | // Most CI environments have restricted UDP buffer sizes (typically 2MB max), 75 | // making it impossible to properly test HTTP/3 0-RTT functionality. 76 | // 77 | // See: https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes 78 | if os.Getenv("CI") == "true" { 79 | t.Skip("Skipping HTTP/3 0-RTT test in CI environment due to UDP buffer size limitations (needs 7MB, CI typically allows only 2MB)") 80 | } 81 | 82 | // Create test certificates 83 | certFile, keyFile := generateTestCertificates(t) 84 | defer os.Remove(certFile) 85 | defer os.Remove(keyFile) 86 | 87 | // Create configuration with 0-RTT enabled 88 | cfg := &config.Config{ 89 | Server: config.ServerConfig{ 90 | Port: 8080, 91 | ReadTimeout: 30 * time.Second, 92 | WriteTimeout: 30 * time.Second, 93 | MaxHeaderBytes: 1 << 20, 94 | ShutdownTimeout: 30 * time.Second, 95 | HTTP3: &config.HTTP3Config{ 96 | Enabled: true, 97 | Port: 8443, 98 | TLSCertFile: certFile, 99 | TLSKeyFile: keyFile, 100 | IdleTimeout: 5 * time.Minute, 101 | MaxBiStreamsConcurrent: 1000, 102 | MaxUniStreamsConcurrent: 1000, 103 | MaxStreamReceiveWindow: 10 * 1024 * 1024, 104 | MaxConnectionReceiveWindow: 25 * 1024 * 1024, 105 | Enable0RTT: true, 106 | Max0RTTSize: 16 * 1024, 107 | Allow0RTTReplay: false, 108 | // Set UDP buffer size to 7MB as required by quic-go for proper operation 109 | // This value comes from quic-go's internal requirements: 110 | // https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes#non-bsd 111 | UDPReceiveBufferSize: 7168 * 1024, // 7MB (7168 KB) - minimum required by quic-go 112 | }, 113 | }, 114 | LLM: config.LLMConfig{ 115 | Provider: "mock", 116 | Model: "mock-model", 117 | SystemPrompt: "You are a test assistant", 118 | }, 119 | } 120 | 121 | // Create test logger 122 | logger := zaptest.NewLogger(t) 123 | 124 | // Create mock LLM 125 | mockLLM := mocks.NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 126 | return "test response", nil 127 | }) 128 | 129 | // Create server with better error handling 130 | server, err := NewServerWithConfig(mocks.NewMockConfigWatcher(cfg), mockLLM, logger) 131 | require.NoError(t, err, "Failed to create server") 132 | require.NotNil(t, server, "Server instance should not be nil") 133 | 134 | // Start server 135 | ctx, cancel := context.WithCancel(context.Background()) 136 | defer cancel() 137 | 138 | errCh := make(chan error, 1) 139 | go func() { 140 | errCh <- server.Start(ctx) 141 | }() 142 | 143 | // Configure HTTP/3 client with longer timeouts 144 | transport := &http3.Transport{ 145 | TLSClientConfig: &tls.Config{ 146 | InsecureSkipVerify: true, 147 | }, 148 | QUICConfig: &quic.Config{ 149 | MaxIdleTimeout: 30 * time.Second, 150 | HandshakeIdleTimeout: 10 * time.Second, 151 | MaxStreamReceiveWindow: 10 * 1024 * 1024, 152 | MaxConnectionReceiveWindow: 25 * 1024 * 1024, 153 | KeepAlivePeriod: 5 * time.Second, 154 | Allow0RTT: true, 155 | }, 156 | } 157 | defer transport.Close() 158 | 159 | client := &http.Client{ 160 | Transport: transport, 161 | Timeout: 30 * time.Second, 162 | } 163 | 164 | // Wait for server to be ready 165 | require.Eventually(t, func() bool { 166 | resp, err := client.Get("https://localhost:8443/health") 167 | if err != nil { 168 | t.Logf("Server not ready: %v", err) 169 | return false 170 | } 171 | defer resp.Body.Close() 172 | return resp.StatusCode == http.StatusOK 173 | }, 10*time.Second, 100*time.Millisecond, "Server failed to start") 174 | 175 | t.Run("0-RTT Basic Functionality", func(t *testing.T) { 176 | // First request establishes connection 177 | resp, err := client.Get("https://localhost:8443/health") 178 | require.NoError(t, err) 179 | defer resp.Body.Close() 180 | assert.Equal(t, http.StatusOK, resp.StatusCode) 181 | 182 | // Second request should use 0-RTT 183 | resp, err = client.Get("https://localhost:8443/health") 184 | require.NoError(t, err) 185 | defer resp.Body.Close() 186 | assert.Equal(t, http.StatusOK, resp.StatusCode) 187 | }) 188 | 189 | t.Run("0-RTT Replay Protection with Real Data", func(t *testing.T) { 190 | // Create completion request 191 | reqBody := map[string]string{"input": "test"} 192 | jsonData, err := json.Marshal(reqBody) 193 | require.NoError(t, err) 194 | 195 | // First request 196 | req1, err := http.NewRequest(http.MethodPost, "https://localhost:8443/v1/completions", bytes.NewBuffer(jsonData)) 197 | require.NoError(t, err) 198 | req1.Header.Set("Content-Type", "application/json") 199 | 200 | resp, err := client.Do(req1) 201 | require.NoError(t, err) 202 | defer resp.Body.Close() 203 | assert.Equal(t, http.StatusOK, resp.StatusCode) 204 | 205 | // Create a new request with the same data for replay 206 | req2, err := http.NewRequest(http.MethodPost, "https://localhost:8443/v1/completions", bytes.NewBuffer(jsonData)) 207 | require.NoError(t, err) 208 | req2.Header.Set("Content-Type", "application/json") 209 | 210 | // Immediate replay should be rejected 211 | resp, err = client.Do(req2) 212 | require.NoError(t, err) 213 | defer resp.Body.Close() 214 | assert.Equal(t, http.StatusTooEarly, resp.StatusCode) 215 | }) 216 | 217 | // Cleanup 218 | cancel() 219 | select { 220 | case err := <-errCh: 221 | assert.NoError(t, err) 222 | case <-time.After(5 * time.Second): 223 | t.Error("Server did not shut down within timeout") 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /server/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | package metrics 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | "github.com/prometheus/client_golang/prometheus/collectors" 8 | "github.com/prometheus/client_golang/prometheus/promauto" 9 | "github.com/prometheus/client_golang/prometheus/promhttp" 10 | ) 11 | 12 | // Metrics encapsulates Prometheus metrics for the server. 13 | type Metrics struct { 14 | registry *prometheus.Registry 15 | RequestsTotal *prometheus.CounterVec 16 | RequestDuration *prometheus.HistogramVec 17 | ActiveRequests *prometheus.GaugeVec 18 | ErrorsTotal *prometheus.CounterVec 19 | RateLimitHits *prometheus.CounterVec 20 | } 21 | 22 | // NewMetrics creates a new Metrics instance with a custom registry. 23 | func NewMetrics() *Metrics { 24 | registry := prometheus.NewRegistry() 25 | factory := promauto.With(registry) 26 | 27 | m := &Metrics{ 28 | registry: registry, 29 | RequestsTotal: factory.NewCounterVec( 30 | prometheus.CounterOpts{ 31 | Name: "hapax_http_requests_total", 32 | Help: "Total number of HTTP requests by endpoint and status", 33 | }, 34 | []string{"endpoint", "status"}, 35 | ), 36 | RequestDuration: factory.NewHistogramVec( 37 | prometheus.HistogramOpts{ 38 | Name: "hapax_http_request_duration_seconds", 39 | Help: "Duration of HTTP requests in seconds", 40 | Buckets: prometheus.DefBuckets, 41 | }, 42 | []string{"endpoint"}, 43 | ), 44 | ActiveRequests: factory.NewGaugeVec( 45 | prometheus.GaugeOpts{ 46 | Name: "hapax_http_active_requests", 47 | Help: "Number of currently active HTTP requests", 48 | }, 49 | []string{"endpoint"}, 50 | ), 51 | ErrorsTotal: factory.NewCounterVec( 52 | prometheus.CounterOpts{ 53 | Name: "hapax_errors_total", 54 | Help: "Total number of errors by type", 55 | }, 56 | []string{"type"}, 57 | ), 58 | RateLimitHits: factory.NewCounterVec( 59 | prometheus.CounterOpts{ 60 | Name: "hapax_rate_limit_hits_total", 61 | Help: "Total number of rate limit hits by client", 62 | }, 63 | []string{"client"}, 64 | ), 65 | } 66 | 67 | // Register default Go metrics 68 | registry.MustRegister(collectors.NewGoCollector()) 69 | registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) 70 | 71 | // Initialize some default metrics 72 | m.RequestsTotal.WithLabelValues("/health", "200").Add(0) 73 | m.RequestsTotal.WithLabelValues("/metrics", "200").Add(0) 74 | m.RequestDuration.WithLabelValues("/health").Observe(0) 75 | m.RequestDuration.WithLabelValues("/metrics").Observe(0) 76 | m.ActiveRequests.WithLabelValues("queued").Add(0) 77 | m.ActiveRequests.WithLabelValues("processing").Add(0) 78 | 79 | return m 80 | } 81 | 82 | // Handler returns a handler for the metrics endpoint. 83 | func (m *Metrics) Handler() http.Handler { 84 | return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{ 85 | EnableOpenMetrics: false, // Disable OpenMetrics format to avoid escaping=values 86 | }) 87 | } 88 | -------------------------------------------------------------------------------- /server/middleware/auth.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | 7 | "github.com/teilomillet/hapax/errors" 8 | ) 9 | 10 | // Authentication middleware validates API keys and manages authentication 11 | func Authentication(next http.Handler) http.Handler { 12 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 13 | // Check for API key 14 | apiKey := r.Header.Get("X-API-Key") 15 | if apiKey != "" { 16 | // TODO: Validate API key against configuration or database 17 | // For now, we'll accept any non-empty key 18 | next.ServeHTTP(w, r) 19 | return 20 | } 21 | 22 | // Check for Bearer token 23 | authHeader := r.Header.Get("Authorization") 24 | if authHeader != "" && strings.HasPrefix(authHeader, "Bearer ") { 25 | token := strings.TrimPrefix(authHeader, "Bearer ") 26 | if token != "" { 27 | // TODO: Validate token against configuration or database 28 | // For now, we'll accept any non-empty token 29 | next.ServeHTTP(w, r) 30 | return 31 | } 32 | } 33 | 34 | errors.ErrorWithType(w, "Missing or invalid authentication", errors.AuthenticationError, http.StatusUnauthorized) 35 | }) 36 | } 37 | -------------------------------------------------------------------------------- /server/middleware/constants.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | type contextKey string 4 | 5 | const ( 6 | RequestIDKey contextKey = "request_id" 7 | XTestTimeoutKey contextKey = "X-Test-Timeout" 8 | ) 9 | -------------------------------------------------------------------------------- /server/middleware/logging.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "go.uber.org/zap" 8 | ) 9 | 10 | // ResponseWriter wraps http.ResponseWriter to capture status code and size 11 | type ResponseWriter struct { 12 | http.ResponseWriter 13 | status int 14 | size int64 15 | } 16 | 17 | // NewResponseWriter creates a new ResponseWriter 18 | func NewResponseWriter(w http.ResponseWriter) *ResponseWriter { 19 | return &ResponseWriter{ResponseWriter: w} 20 | } 21 | 22 | func (w *ResponseWriter) WriteHeader(status int) { 23 | w.status = status 24 | w.ResponseWriter.WriteHeader(status) 25 | } 26 | 27 | func (w *ResponseWriter) Write(b []byte) (int, error) { 28 | size, err := w.ResponseWriter.Write(b) 29 | w.size += int64(size) 30 | return size, err 31 | } 32 | 33 | // Status returns the status code 34 | func (w *ResponseWriter) Status() int { 35 | if w.status == 0 { 36 | return http.StatusOK 37 | } 38 | return w.status 39 | } 40 | 41 | // Size returns the response size 42 | func (w *ResponseWriter) Size() int64 { 43 | return w.size 44 | } 45 | 46 | // Logging middleware logs request and response details 47 | func Logging(logger *zap.Logger) func(http.Handler) http.Handler { 48 | return func(next http.Handler) http.Handler { 49 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 50 | start := time.Now() 51 | rw := NewResponseWriter(w) 52 | 53 | // Log request details 54 | logger.Info("Request started", 55 | zap.String("method", r.Method), 56 | zap.String("path", r.URL.Path), 57 | zap.String("remote_addr", r.RemoteAddr), 58 | zap.String("user_agent", r.UserAgent()), 59 | ) 60 | 61 | next.ServeHTTP(rw, r) 62 | 63 | // Log response details 64 | logger.Info("Request completed", 65 | zap.Duration("duration", time.Since(start)), 66 | zap.Int("status", rw.Status()), 67 | zap.Int64("size", rw.Size()), 68 | ) 69 | }) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /server/middleware/metrics.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/teilomillet/hapax/server/metrics" 9 | ) 10 | 11 | // PrometheusMetrics middleware records HTTP metrics using Prometheus. 12 | // It wraps the HTTP handler to measure request duration and active requests. 13 | // It takes a Metrics object as an argument to track metrics. 14 | func PrometheusMetrics(m *metrics.Metrics) func(next http.Handler) http.Handler { 15 | // Return a function that takes an http.Handler and returns another http.Handler 16 | return func(next http.Handler) http.Handler { 17 | // Return an http.HandlerFunc that wraps the original handler 18 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 19 | // Record the start time of the request 20 | start := time.Now() 21 | 22 | // Track active requests 23 | // Increment the active request count for the current URL path 24 | m.ActiveRequests.WithLabelValues(r.URL.Path).Inc() 25 | // Decrement the active request count when the request is done 26 | defer m.ActiveRequests.WithLabelValues(r.URL.Path).Dec() 27 | 28 | // Create a response writer that captures the status code 29 | // This allows us to intercept the status code returned by the handler 30 | // and record metrics about the response status 31 | rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK} 32 | 33 | // Call the next handler in the chain 34 | next.ServeHTTP(rw, r) 35 | 36 | // Record metrics 37 | // Calculate the request duration 38 | duration := time.Since(start).Seconds() 39 | // Convert the status code to a string 40 | status := strconv.Itoa(rw.statusCode) 41 | 42 | // Increment the total request count for the current URL path and status code 43 | m.RequestsTotal.WithLabelValues(r.URL.Path, status).Inc() 44 | // Record the request duration for the current URL path 45 | m.RequestDuration.WithLabelValues(r.URL.Path).Observe(duration) 46 | 47 | // Record errors 48 | // Check if the status code indicates a server error 49 | if rw.statusCode >= 500 { 50 | // Increment the error count for server errors 51 | m.ErrorsTotal.WithLabelValues("server_error").Inc() 52 | } else if rw.statusCode >= 400 { 53 | // Increment the error count for client errors 54 | m.ErrorsTotal.WithLabelValues("client_error").Inc() 55 | } 56 | }) 57 | } 58 | } 59 | 60 | // responseWriter wraps http.ResponseWriter to capture the status code 61 | // It holds the status code and a flag to check if the header has been written. 62 | type responseWriter struct { 63 | http.ResponseWriter 64 | statusCode int 65 | wroteHeader bool 66 | } 67 | 68 | // WriteHeader captures the status code and writes it to the response. 69 | // It overrides the default behavior of the ResponseWriter. 70 | func (rw *responseWriter) WriteHeader(code int) { 71 | // Store the status code 72 | rw.statusCode = code 73 | // Mark that the header has been written 74 | rw.wroteHeader = true 75 | // Call the original WriteHeader method 76 | rw.ResponseWriter.WriteHeader(code) 77 | } 78 | 79 | // Write captures the response body and allows us to record metrics. 80 | // It overrides the default behavior of the ResponseWriter. 81 | func (rw *responseWriter) Write(b []byte) (int, error) { 82 | // If the header has not been written, write it with a status code of 200 83 | if !rw.wroteHeader { 84 | rw.WriteHeader(http.StatusOK) 85 | } 86 | // Call the original Write method to write the response 87 | return rw.ResponseWriter.Write(b) 88 | } 89 | -------------------------------------------------------------------------------- /server/middleware/metrics_test.go: -------------------------------------------------------------------------------- 1 | package middleware_test 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "net/http" 7 | "net/http/httptest" 8 | "testing" 9 | 10 | "github.com/prometheus/client_golang/prometheus" 11 | "github.com/prometheus/client_golang/prometheus/testutil" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | "github.com/teilomillet/gollm" 15 | "github.com/teilomillet/hapax/config" 16 | "github.com/teilomillet/hapax/server/metrics" 17 | "github.com/teilomillet/hapax/server/middleware" 18 | "github.com/teilomillet/hapax/server/mocks" 19 | "github.com/teilomillet/hapax/server/provider" 20 | "go.uber.org/zap" 21 | ) 22 | 23 | func TestPrometheusMetrics(t *testing.T) { 24 | // Create new metrics instance for testing 25 | m := metrics.NewMetrics() 26 | 27 | tests := []struct { 28 | name string 29 | handler http.HandlerFunc 30 | expectedCode int 31 | expectedPath string 32 | expectedStatus string 33 | }{ 34 | { 35 | name: "success request", 36 | handler: func(w http.ResponseWriter, r *http.Request) { 37 | w.WriteHeader(http.StatusOK) 38 | }, 39 | expectedCode: http.StatusOK, 40 | expectedPath: "/", 41 | expectedStatus: "200", 42 | }, 43 | { 44 | name: "error request", 45 | handler: func(w http.ResponseWriter, r *http.Request) { 46 | w.WriteHeader(http.StatusInternalServerError) 47 | }, 48 | expectedCode: http.StatusInternalServerError, 49 | expectedPath: "/", 50 | expectedStatus: "500", 51 | }, 52 | } 53 | 54 | for _, tt := range tests { 55 | t.Run(tt.name, func(t *testing.T) { 56 | // Create test server 57 | handler := middleware.PrometheusMetrics(m)(tt.handler) 58 | server := httptest.NewServer(handler) 59 | defer server.Close() 60 | 61 | // Make request 62 | resp, err := http.Get(server.URL) 63 | if err != nil { 64 | t.Fatal(err) 65 | } 66 | defer resp.Body.Close() 67 | 68 | // Check response code 69 | assert.Equal(t, tt.expectedCode, resp.StatusCode) 70 | 71 | // Check request metrics 72 | requestCount := testutil.ToFloat64(m.RequestsTotal.WithLabelValues(tt.expectedPath, tt.expectedStatus)) 73 | assert.Equal(t, float64(1), requestCount) 74 | 75 | // Check active requests (should be 0 after request completes) 76 | activeRequests := testutil.ToFloat64(m.ActiveRequests.WithLabelValues(tt.expectedPath)) 77 | assert.Equal(t, float64(0), activeRequests) 78 | 79 | // Check error metrics for 5xx responses 80 | if tt.expectedCode >= 500 { 81 | errorCount := testutil.ToFloat64(m.ErrorsTotal.WithLabelValues("server_error")) 82 | assert.Equal(t, float64(1), errorCount) 83 | } 84 | }) 85 | } 86 | } 87 | 88 | // TestMetricsObservability systematically validates metrics tracking mechanisms 89 | func TestMetricsObservability(t *testing.T) { 90 | // Comprehensive Test Scenarios 91 | testCases := []struct { 92 | name string 93 | providerBehavior func(context.Context, *gollm.Prompt) (string, error) 94 | expectedMetrics map[string]float64 95 | expectedError bool 96 | }{ 97 | { 98 | name: "Successful Provider Interaction", 99 | providerBehavior: func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 100 | return "Successful response", nil 101 | }, 102 | expectedMetrics: map[string]float64{ 103 | "hapax_provider_requests_total": 1, 104 | "hapax_provider_errors_total": 0, 105 | }, 106 | expectedError: false, 107 | }, 108 | { 109 | name: "Provider Failure Scenario", 110 | providerBehavior: func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 111 | return "", fmt.Errorf("simulated provider error") 112 | }, 113 | expectedMetrics: map[string]float64{ 114 | "hapax_provider_requests_total": 1, 115 | "hapax_provider_errors_total": 1, 116 | }, 117 | expectedError: true, 118 | }, 119 | } 120 | 121 | for _, tc := range testCases { 122 | t.Run(tc.name, func(t *testing.T) { 123 | // Create precise metrics tracking infrastructure 124 | requestsTotal := prometheus.NewCounterVec( 125 | prometheus.CounterOpts{ 126 | Name: "hapax_provider_requests_total", 127 | Help: "Total number of provider requests", 128 | }, 129 | []string{"provider"}, 130 | ) 131 | 132 | errorsTotal := prometheus.NewCounterVec( 133 | prometheus.CounterOpts{ 134 | Name: "hapax_provider_errors_total", 135 | Help: "Total number of provider errors", 136 | }, 137 | []string{"provider"}, 138 | ) 139 | 140 | // Establish comprehensive metrics registry 141 | registry := prometheus.NewRegistry() 142 | registry.MustRegister(requestsTotal, errorsTotal) 143 | 144 | // Create mock provider with explicit error generation 145 | mockProvider := mocks.NewMockLLMWithConfig( 146 | "test", 147 | "test-model", 148 | func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 149 | // Directly use the test case's provider behavior 150 | return tc.providerBehavior(ctx, prompt) 151 | }, 152 | ) 153 | 154 | // Construct provider configuration 155 | cfg := &config.Config{ 156 | TestMode: true, 157 | Providers: map[string]config.ProviderConfig{ 158 | "test": { 159 | Type: "test", 160 | Model: "test-model", 161 | }, 162 | }, 163 | ProviderPreference: []string{"test"}, 164 | } 165 | 166 | // Initialize provider manager 167 | logger := zap.NewNop() 168 | manager, err := provider.NewManager(cfg, logger, registry) 169 | require.NoError(t, err) 170 | 171 | // Configure providers 172 | providers := map[string]gollm.LLM{ 173 | "test": mockProvider, 174 | } 175 | manager.SetProviders(providers) 176 | 177 | // Prepare test prompt 178 | prompt := &gollm.Prompt{ 179 | Messages: []gollm.PromptMessage{ 180 | {Role: "user", Content: "Test metrics observability"}, 181 | }, 182 | } 183 | 184 | // Increment request metric before execution 185 | requestsTotal.WithLabelValues("test").Inc() 186 | 187 | // Execute request with comprehensive error handling 188 | var executionError error 189 | err = manager.Execute(context.Background(), func(llm gollm.LLM) error { 190 | _, execErr := llm.Generate(context.Background(), prompt) 191 | 192 | // Track error metric for failure scenarios 193 | if execErr != nil { 194 | errorsTotal.WithLabelValues("test").Inc() 195 | } 196 | 197 | // Capture and preserve execution error 198 | executionError = execErr 199 | return execErr 200 | }, prompt) 201 | 202 | // Error expectation validation 203 | if tc.expectedError { 204 | require.Error(t, executionError, "Expected error in failure scenario") 205 | require.Error(t, err, "Manager execution should propagate error") 206 | } else { 207 | require.NoError(t, executionError, "No error expected in successful scenario") 208 | require.NoError(t, err, "Manager execution should succeed") 209 | } 210 | 211 | // Comprehensive metrics verification 212 | mfs, err := registry.Gather() 213 | require.NoError(t, err) 214 | 215 | // Systematic metrics validation mechanism 216 | for _, mf := range mfs { 217 | for _, metric := range mf.GetMetric() { 218 | switch mf.GetName() { 219 | case "hapax_provider_requests_total": 220 | actualValue := metric.GetCounter().GetValue() 221 | assert.Equal(t, 222 | tc.expectedMetrics["hapax_provider_requests_total"], 223 | actualValue, 224 | "Requests total metric did not match expected value", 225 | ) 226 | 227 | case "hapax_provider_errors_total": 228 | actualValue := metric.GetCounter().GetValue() 229 | assert.Equal(t, 230 | tc.expectedMetrics["hapax_provider_errors_total"], 231 | actualValue, 232 | "Errors total metric did not match expected value", 233 | ) 234 | } 235 | } 236 | } 237 | }) 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /server/middleware/middleware.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/go-chi/chi/v5/middleware" 8 | "github.com/teilomillet/hapax/errors" 9 | ) 10 | 11 | // RequestTimer measures request processing time 12 | // It wraps the HTTP handler to calculate the duration of the request 13 | // and sets the X-Response-Time header in the response. 14 | func RequestTimer(next http.Handler) http.Handler { 15 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 16 | start := time.Now() // Record the start time of the request 17 | ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor) // Wrap the response writer 18 | next.ServeHTTP(ww, r) // Call the next handler 19 | duration := time.Since(start) // Calculate the duration 20 | w.Header().Set("X-Response-Time", duration.String()) // Set the response header 21 | }) 22 | } 23 | 24 | // PanicRecovery recovers from panics and returns a 500 error 25 | func PanicRecovery(next http.Handler) http.Handler { 26 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 27 | defer func() { 28 | if err := recover(); err != nil { 29 | errors.ErrorWithType(w, "Internal server error", errors.InternalError, http.StatusInternalServerError) 30 | } 31 | }() 32 | next.ServeHTTP(w, r) 33 | }) 34 | } 35 | 36 | // CORS handles Cross-Origin Resource Sharing 37 | // It allows or denies requests from different origins based on the configuration. 38 | func CORS(next http.Handler) http.Handler { 39 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 40 | // Set CORS headers to allow cross-origin requests 41 | w.Header().Set("Access-Control-Allow-Origin", "*") // Allow all origins 42 | w.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") // Allow GET, POST, PUT, DELETE, and OPTIONS methods 43 | w.Header().Set("Access-Control-Allow-Headers", "Accept, Authorization, Content-Type, X-CSRF-Token") // Allow Accept, Authorization, Content-Type, and X-CSRF-Token headers 44 | 45 | // Handle preflight requests 46 | if r.Method == http.MethodOptions { 47 | // Respond with 204 No Content for preflight requests 48 | w.WriteHeader(http.StatusNoContent) 49 | return 50 | } 51 | 52 | // Call the next handler for non-preflight requests 53 | next.ServeHTTP(w, r) 54 | }) 55 | } 56 | -------------------------------------------------------------------------------- /server/middleware/middleware_test.go: -------------------------------------------------------------------------------- 1 | package middleware_test 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | "time" 8 | 9 | "github.com/prometheus/client_golang/prometheus" 10 | "github.com/stretchr/testify/assert" 11 | "github.com/teilomillet/hapax/server/metrics" 12 | "github.com/teilomillet/hapax/server/middleware" 13 | ) 14 | 15 | func TestRequestID(t *testing.T) { 16 | handler := middleware.RequestID(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 17 | // Handler should see request ID in context 18 | requestID := r.Context().Value(middleware.RequestIDKey).(string) 19 | assert.NotEmpty(t, requestID) 20 | assert.Equal(t, requestID, w.Header().Get("X-Request-ID")) 21 | })) 22 | 23 | tests := []struct { 24 | name string 25 | }{ 26 | { 27 | name: "generates new request ID", 28 | }, 29 | } 30 | 31 | for _, tt := range tests { 32 | t.Run(tt.name, func(t *testing.T) { 33 | req := httptest.NewRequest("GET", "/", nil) 34 | rec := httptest.NewRecorder() 35 | 36 | handler.ServeHTTP(rec, req) 37 | 38 | // Check response header 39 | respID := rec.Header().Get("X-Request-ID") 40 | assert.NotEmpty(t, respID) 41 | 42 | // Request ID should be a UUID 43 | assert.Len(t, respID, 36) // UUID v4 length 44 | }) 45 | } 46 | } 47 | 48 | func TestRequestTimer(t *testing.T) { 49 | handler := middleware.RequestTimer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 50 | time.Sleep(10 * time.Millisecond) // Simulate some work 51 | })) 52 | 53 | req := httptest.NewRequest("GET", "/", nil) 54 | rec := httptest.NewRecorder() 55 | 56 | handler.ServeHTTP(rec, req) 57 | 58 | respTime := rec.Header().Get("X-Response-Time") 59 | assert.NotEmpty(t, respTime) 60 | 61 | duration, err := time.ParseDuration(respTime) 62 | assert.NoError(t, err) 63 | assert.GreaterOrEqual(t, duration, 10*time.Millisecond) 64 | } 65 | 66 | func TestPanicRecovery(t *testing.T) { 67 | handler := middleware.PanicRecovery(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 68 | panic("test panic") 69 | })) 70 | 71 | req := httptest.NewRequest("GET", "/", nil) 72 | rec := httptest.NewRecorder() 73 | 74 | handler.ServeHTTP(rec, req) 75 | 76 | assert.Equal(t, http.StatusInternalServerError, rec.Code) 77 | } 78 | 79 | func TestCORS(t *testing.T) { 80 | tests := []struct { 81 | name string 82 | method string 83 | expectedStatus int 84 | expectedHeaders map[string]string 85 | }{ 86 | { 87 | name: "preflight request", 88 | method: "OPTIONS", 89 | expectedStatus: http.StatusNoContent, 90 | expectedHeaders: map[string]string{ 91 | "Access-Control-Allow-Origin": "*", 92 | "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS", 93 | "Access-Control-Allow-Headers": "Accept, Authorization, Content-Type, X-CSRF-Token", 94 | }, 95 | }, 96 | { 97 | name: "normal request", 98 | method: "GET", 99 | expectedStatus: http.StatusOK, 100 | expectedHeaders: map[string]string{ 101 | "Access-Control-Allow-Origin": "*", 102 | "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS", 103 | "Access-Control-Allow-Headers": "Accept, Authorization, Content-Type, X-CSRF-Token", 104 | }, 105 | }, 106 | } 107 | 108 | for _, tt := range tests { 109 | t.Run(tt.name, func(t *testing.T) { 110 | handler := middleware.CORS(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 111 | w.WriteHeader(http.StatusOK) 112 | })) 113 | 114 | req := httptest.NewRequest(tt.method, "/", nil) 115 | rr := httptest.NewRecorder() 116 | 117 | handler.ServeHTTP(rr, req) 118 | 119 | assert.Equal(t, tt.expectedStatus, rr.Code) 120 | for key, value := range tt.expectedHeaders { 121 | assert.Equal(t, value, rr.Header().Get(key)) 122 | } 123 | }) 124 | } 125 | } 126 | 127 | func TestAuthentication(t *testing.T) { 128 | // Create test handler 129 | nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 130 | w.WriteHeader(http.StatusOK) 131 | }) 132 | handler := middleware.Authentication(nextHandler) 133 | 134 | // Test without auth header 135 | req := httptest.NewRequest("GET", "/", nil) 136 | w := httptest.NewRecorder() 137 | handler.ServeHTTP(w, req) 138 | assert.Equal(t, http.StatusUnauthorized, w.Code) 139 | 140 | // Test with invalid auth header 141 | req = httptest.NewRequest("GET", "/", nil) 142 | req.Header.Set("Authorization", "invalid") 143 | w = httptest.NewRecorder() 144 | handler.ServeHTTP(w, req) 145 | assert.Equal(t, http.StatusUnauthorized, w.Code) 146 | 147 | // Test with valid auth header 148 | req = httptest.NewRequest("GET", "/", nil) 149 | req.Header.Set("Authorization", "Bearer valid-token") 150 | w = httptest.NewRecorder() 151 | handler.ServeHTTP(w, req) 152 | assert.Equal(t, http.StatusOK, w.Code) 153 | } 154 | 155 | func TestTimeout(t *testing.T) { 156 | // Create test handler that sleeps 157 | nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 158 | time.Sleep(2 * time.Second) 159 | w.WriteHeader(http.StatusOK) 160 | }) 161 | handler := middleware.Timeout(1 * time.Second)(nextHandler) 162 | 163 | // Test timeout 164 | req := httptest.NewRequest("GET", "/", nil) 165 | w := httptest.NewRecorder() 166 | handler.ServeHTTP(w, req) 167 | assert.Equal(t, http.StatusGatewayTimeout, w.Code) 168 | 169 | // Test success (no timeout) 170 | nextHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 171 | w.WriteHeader(http.StatusOK) 172 | }) 173 | handler = middleware.Timeout(1 * time.Second)(nextHandler) 174 | req = httptest.NewRequest("GET", "/", nil) 175 | w = httptest.NewRecorder() 176 | handler.ServeHTTP(w, req) 177 | assert.Equal(t, http.StatusOK, w.Code) 178 | } 179 | 180 | func TestRateLimit(t *testing.T) { 181 | // Reset metrics registry 182 | prometheus.DefaultRegisterer = prometheus.NewRegistry() 183 | 184 | // Create metrics 185 | m := metrics.NewMetrics() 186 | 187 | // Create test handler 188 | nextHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 189 | w.WriteHeader(http.StatusOK) 190 | }) 191 | 192 | // Create middleware handler 193 | handler := middleware.RateLimit(m)(nextHandler) 194 | 195 | // Test rate limit 196 | for i := 0; i < 11; i++ { 197 | req := httptest.NewRequest("GET", "/", nil) 198 | req.RemoteAddr = "127.0.0.1:1234" 199 | w := httptest.NewRecorder() 200 | handler.ServeHTTP(w, req) 201 | 202 | if i < 10 { 203 | assert.Equal(t, http.StatusOK, w.Code) 204 | } else { 205 | assert.Equal(t, http.StatusTooManyRequests, w.Code) 206 | } 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /server/middleware/ratelimit.go: -------------------------------------------------------------------------------- 1 | package middleware 2 | 3 | import ( 4 | "net/http" 5 | "strings" 6 | "sync" 7 | "time" 8 | 9 | "github.com/teilomillet/hapax/errors" 10 | "github.com/teilomillet/hapax/server/metrics" 11 | "golang.org/x/time/rate" 12 | ) 13 | 14 | // rateLimiters holds the rate limiters for each visitor IP address 15 | // and ensures safe concurrent access using a read-write mutex. 16 | type rateLimiters struct { 17 | // visitors is a map of IP addresses to their corresponding rate limiters. 18 | visitors map[string]*rate.Limiter 19 | // mu is a read-write mutex that protects access to the visitors map. 20 | mu sync.RWMutex 21 | } 22 | 23 | // limiters is a global instance of rateLimiters to manage rate limiting. 24 | var ( 25 | limiters = &rateLimiters{ 26 | visitors: make(map[string]*rate.Limiter), 27 | } 28 | ) 29 | 30 | // GetOrCreate retrieves the rate limiter for the given IP address, 31 | // creating a new one if it does not exist. 32 | func (l *rateLimiters) GetOrCreate(ip string, create func() *rate.Limiter) *rate.Limiter { 33 | // Lock the mutex to ensure exclusive access to the visitors map. 34 | l.mu.Lock() 35 | defer l.mu.Unlock() 36 | 37 | // Check if a rate limiter already exists for the given IP address. 38 | limiter, exists := l.visitors[ip] 39 | if !exists { 40 | // If not, create a new rate limiter using the provided create function. 41 | limiter = create() 42 | // Store the new rate limiter in the visitors map. 43 | l.visitors[ip] = limiter 44 | } 45 | 46 | return limiter 47 | } 48 | 49 | // RateLimit creates a new rate limit middleware that applies rate limiting 50 | // to incoming requests and tracks metrics. 51 | func RateLimit(metrics *metrics.Metrics) func(http.Handler) http.Handler { 52 | return func(next http.Handler) http.Handler { 53 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 54 | // Extract the IP address from the request. 55 | ip := r.RemoteAddr 56 | if idx := strings.LastIndex(ip, ":"); idx != -1 { 57 | // Strip the port number if present. 58 | ip = ip[:idx] 59 | } 60 | 61 | // Get the rate limiter for the IP address, creating a new one if necessary. 62 | limiter := limiters.GetOrCreate(ip, func() *rate.Limiter { 63 | // Create a new rate limiter that allows 10 requests per minute. 64 | return rate.NewLimiter(rate.Every(time.Minute), 10) 65 | }) 66 | 67 | // Try to allow the request. 68 | if !limiter.Allow() { 69 | // If the request is not allowed, increment the rate limit hit metric. 70 | metrics.RateLimitHits.WithLabelValues(ip).Inc() 71 | var requestID string 72 | if id := r.Context().Value(RequestIDKey); id != nil { 73 | requestID = id.(string) 74 | } 75 | 76 | // Create an error response for the rate limit exceeded error. 77 | errResp := errors.NewError( 78 | errors.RateLimitError, 79 | "Rate limit exceeded", 80 | http.StatusTooManyRequests, 81 | requestID, 82 | map[string]interface{}{ 83 | "limit": int64(10), // Use int64 to ensure it's not converted to float64 84 | "window": "1m0s", 85 | }, 86 | nil, 87 | ) 88 | 89 | // Write the error response to the writer. 90 | errors.WriteError(w, errResp) 91 | return 92 | } 93 | 94 | // If the request is allowed, serve the next handler. 95 | next.ServeHTTP(w, r) 96 | }) 97 | } 98 | } 99 | 100 | // ResetRateLimiters resets all rate limiters. Only used for testing. 101 | func ResetRateLimiters() { 102 | limiters.mu.Lock() 103 | defer limiters.mu.Unlock() 104 | limiters.visitors = make(map[string]*rate.Limiter) 105 | } 106 | -------------------------------------------------------------------------------- /server/middleware/ratelimit_test.go: -------------------------------------------------------------------------------- 1 | package middleware_test 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "testing" 7 | 8 | "github.com/prometheus/client_golang/prometheus/testutil" 9 | "github.com/stretchr/testify/assert" 10 | "github.com/teilomillet/hapax/server/metrics" 11 | "github.com/teilomillet/hapax/server/middleware" 12 | ) 13 | 14 | func TestRateLimitMetrics(t *testing.T) { 15 | // Create new metrics instance for testing 16 | m := metrics.NewMetrics() 17 | 18 | // Reset rate limiters 19 | middleware.ResetRateLimiters() 20 | 21 | // Create test handler 22 | handler := middleware.RateLimit(m)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 23 | w.WriteHeader(http.StatusOK) 24 | })) 25 | 26 | // Create test server 27 | server := httptest.NewServer(handler) 28 | defer server.Close() 29 | 30 | // Make requests to trigger rate limit 31 | client := &http.Client{} 32 | testIP := "127.0.0.1" 33 | 34 | // Make 11 requests (1 more than limit) 35 | for i := 0; i < 11; i++ { 36 | req, err := http.NewRequest("GET", server.URL, nil) 37 | assert.NoError(t, err) 38 | req.RemoteAddr = testIP + ":1234" // Set test IP 39 | 40 | resp, err := client.Do(req) 41 | assert.NoError(t, err) 42 | resp.Body.Close() 43 | 44 | // Last request should be rate limited 45 | if i == 10 { 46 | assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode) 47 | 48 | // Check rate limit metric 49 | rateLimitCount := testutil.ToFloat64(m.RateLimitHits.WithLabelValues(testIP)) 50 | assert.Equal(t, float64(1), rateLimitCount) 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /server/middleware/recovery.go: -------------------------------------------------------------------------------- 1 | // Package middleware provides various middleware functions for HTTP handlers. 2 | package middleware 3 | 4 | import ( 5 | "fmt" 6 | "net/http" 7 | "runtime/debug" 8 | 9 | "github.com/teilomillet/hapax/errors" 10 | "go.uber.org/zap" 11 | ) 12 | 13 | // Recovery middleware recovers from panics and logs the error 14 | // It takes a zap.Logger instance for logging errors. 15 | func Recovery(logger *zap.Logger) func(http.Handler) http.Handler { 16 | return func(next http.Handler) http.Handler { 17 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 18 | // Defer a function to recover from panics 19 | defer func() { 20 | if err := recover(); err != nil { 21 | // Capture the stack trace 22 | stack := debug.Stack() 23 | // Log the error and stack trace 24 | logger.Error("Panic recovered", 25 | zap.Any("error", err), 26 | zap.ByteString("stack", stack), 27 | ) 28 | 29 | // Retrieve the request ID from the context 30 | requestID := r.Context().Value(RequestIDKey).(string) 31 | // Write an internal server error response 32 | errors.WriteError(w, errors.NewInternalError( 33 | requestID, 34 | fmt.Errorf("internal server error: %v", err), 35 | )) 36 | } 37 | }() 38 | 39 | // Call the next handler in the chain 40 | next.ServeHTTP(w, r) 41 | }) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /server/middleware/request_id.go: -------------------------------------------------------------------------------- 1 | // Package middleware provides various middleware functions for HTTP handlers. 2 | package middleware 3 | 4 | import ( 5 | "context" 6 | "net/http" 7 | 8 | "github.com/google/uuid" 9 | ) 10 | 11 | // RequestID middleware adds a unique request ID to the context 12 | // and sets it in the response header. 13 | func RequestID(next http.Handler) http.Handler { 14 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 15 | // Generate a unique request ID using UUID. 16 | requestID := uuid.New().String() 17 | 18 | // Set the request ID in the response header for tracking. 19 | w.Header().Set("X-Request-ID", requestID) 20 | 21 | // Add the request ID to the request context for downstream handlers. 22 | ctx := context.WithValue(r.Context(), RequestIDKey, requestID) 23 | // Call the next handler with the updated context. 24 | next.ServeHTTP(w, r.WithContext(ctx)) 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /server/middleware/timeout.go: -------------------------------------------------------------------------------- 1 | // Package middleware provides various middleware functions for HTTP handlers. 2 | package middleware 3 | 4 | import ( 5 | "context" 6 | "net/http" 7 | "time" 8 | 9 | "github.com/teilomillet/hapax/errors" 10 | ) 11 | 12 | const defaultTimeout = 5 * time.Second 13 | 14 | // timeoutWriter wraps http.ResponseWriter to track if a response has been written 15 | // It uses a channel to signal when the response has been sent. 16 | type timeoutWriter struct { 17 | http.ResponseWriter 18 | written chan bool 19 | } 20 | 21 | // Write writes the data to the connection and tracks if the response has been written. 22 | func (tw *timeoutWriter) Write(b []byte) (int, error) { 23 | n, err := tw.ResponseWriter.Write(b) 24 | if n > 0 { 25 | select { 26 | case tw.written <- true: 27 | default: 28 | } 29 | } 30 | return n, err 31 | } 32 | 33 | // WriteHeader sends an HTTP response header and tracks if the response has been written. 34 | func (tw *timeoutWriter) WriteHeader(code int) { 35 | // Call the original WriteHeader method. 36 | tw.ResponseWriter.WriteHeader(code) 37 | select { 38 | case tw.written <- true: 39 | default: 40 | } 41 | } 42 | 43 | // hasWritten checks if the response has been written. 44 | func (tw *timeoutWriter) hasWritten() bool { 45 | select { 46 | case <-tw.written: 47 | return true 48 | default: 49 | return false 50 | } 51 | } 52 | 53 | // Timeout middleware adds a timeout to the request context 54 | // It allows you to specify a duration after which the request will be aborted if not completed. 55 | // 56 | // The Timeout middleware works by creating a new context with a timeout, and using a custom 57 | // timeoutWriter to track whether a response has been written. If the request times out and 58 | // no response has been written, it sends a timeout error response. 59 | func Timeout(timeout time.Duration) func(http.Handler) http.Handler { 60 | return func(next http.Handler) http.Handler { 61 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 62 | // Create a context with timeout 63 | if timeout == 0 { 64 | timeout = defaultTimeout 65 | } 66 | ctx, cancel := context.WithTimeout(r.Context(), timeout) 67 | defer cancel() // Ensure cancel is called to release resources 68 | 69 | // Create a channel to signal completion 70 | done := make(chan struct{}) 71 | 72 | // Use the custom timeoutWriter to track response status. 73 | tw := &timeoutWriter{ 74 | ResponseWriter: w, 75 | written: make(chan bool, 1), 76 | } 77 | 78 | // Process the request in a goroutine 79 | go func() { 80 | defer func() { 81 | close(done) 82 | if ctx.Err() == context.Canceled { 83 | cancel() 84 | } 85 | }() 86 | next.ServeHTTP(tw, r.WithContext(ctx)) 87 | }() 88 | 89 | // Wait for either completion or timeout 90 | select { 91 | case <-done: 92 | // Request completed normally 93 | return 94 | case <-ctx.Done(): 95 | // Request timed out 96 | if !tw.hasWritten() { 97 | // Only write error if nothing has been written yet 98 | var requestID string 99 | if id := r.Context().Value(RequestIDKey); id != nil { 100 | requestID = id.(string) 101 | } 102 | 103 | errResp := errors.NewError( 104 | errors.InternalError, 105 | "Request timeout", 106 | http.StatusGatewayTimeout, 107 | requestID, 108 | map[string]interface{}{ 109 | "timeout": timeout.String(), 110 | }, 111 | ctx.Err(), 112 | ) 113 | 114 | errors.WriteError(tw, errResp) 115 | } 116 | // Cancel the context to stop the goroutine 117 | cancel() 118 | return 119 | } 120 | }) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /server/mock_test.go: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/teilomillet/gollm" 7 | "github.com/teilomillet/gollm/llm" 8 | "github.com/teilomillet/gollm/utils" 9 | ) 10 | 11 | // MockLLM implements a mock LLM for testing purposes 12 | type MockLLM struct { 13 | GenerateFunc func(context.Context, *gollm.Prompt) (string, error) 14 | DebugFunc func(string, ...interface{}) 15 | } 16 | 17 | // NewMockLLM creates a new MockLLM with optional generate function 18 | func NewMockLLM(generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM { 19 | return &MockLLM{ 20 | GenerateFunc: generateFunc, 21 | } 22 | } 23 | 24 | func (m *MockLLM) Generate(ctx context.Context, prompt *gollm.Prompt, opts ...llm.GenerateOption) (string, error) { 25 | if m.GenerateFunc != nil { 26 | return m.GenerateFunc(ctx, prompt) 27 | } 28 | return "", nil 29 | } 30 | 31 | func (m *MockLLM) Debug(format string, args ...interface{}) { 32 | if m.DebugFunc != nil { 33 | m.DebugFunc(format, args...) 34 | } 35 | } 36 | 37 | func (m *MockLLM) GetPromptJSONSchema(opts ...gollm.SchemaOption) ([]byte, error) { 38 | return []byte(`{}`), nil 39 | } 40 | 41 | func (m *MockLLM) GetProvider() string { 42 | return "mock" 43 | } 44 | 45 | func (m *MockLLM) GetModel() string { 46 | return "mock-model" 47 | } 48 | 49 | func (m *MockLLM) UpdateLogLevel(level gollm.LogLevel) { 50 | // No-op for mock 51 | } 52 | 53 | func (m *MockLLM) GetLogLevel() gollm.LogLevel { 54 | return gollm.LogLevelOff 55 | } 56 | 57 | func (m *MockLLM) SetLogLevel(level gollm.LogLevel) { 58 | // No-op for mock 59 | } 60 | 61 | func (m *MockLLM) GetLogger() utils.Logger { 62 | return utils.NewLogger(gollm.LogLevelOff) 63 | } 64 | 65 | func (m *MockLLM) NewPrompt(text string) *gollm.Prompt { 66 | return gollm.NewPrompt(text) 67 | } 68 | 69 | func (m *MockLLM) SetEndpoint(endpoint string) { 70 | // No-op for mock 71 | } 72 | 73 | func (m *MockLLM) SetOption(key string, value interface{}) { 74 | // No-op for mock 75 | } 76 | 77 | func (m *MockLLM) SupportsJSONSchema() bool { 78 | return false 79 | } 80 | 81 | func (m *MockLLM) GenerateWithSchema(ctx context.Context, prompt *gollm.Prompt, schema interface{}, opts ...llm.GenerateOption) (string, error) { 82 | return m.Generate(ctx, prompt, opts...) 83 | } 84 | 85 | func (m *MockLLM) SetOllamaEndpoint(endpoint string) error { 86 | // No-op for mock 87 | return nil 88 | } 89 | 90 | func (m *MockLLM) SetSystemPrompt(prompt string, cacheType llm.CacheType) { 91 | // No-op for mock 92 | } 93 | -------------------------------------------------------------------------------- /server/mocks/config_watcher.go: -------------------------------------------------------------------------------- 1 | package mocks 2 | 3 | import ( 4 | "sync/atomic" 5 | 6 | "github.com/teilomillet/hapax/config" 7 | ) 8 | 9 | // MockConfigWatcher provides a testable implementation of config.Watcher 10 | type MockConfigWatcher struct { 11 | currentConfig atomic.Value 12 | subscribers []chan *config.Config 13 | } 14 | 15 | // Verify at compile time that MockConfigWatcher implements config.Watcher 16 | var _ config.Watcher = (*MockConfigWatcher)(nil) 17 | 18 | // NewMockConfigWatcher creates a new MockConfigWatcher initialized with the provided config 19 | func NewMockConfigWatcher(cfg *config.Config) *MockConfigWatcher { 20 | mcw := &MockConfigWatcher{ 21 | subscribers: make([]chan *config.Config, 0), 22 | } 23 | mcw.currentConfig.Store(cfg) 24 | return mcw 25 | } 26 | 27 | // GetCurrentConfig implements config.Watcher 28 | func (m *MockConfigWatcher) GetCurrentConfig() *config.Config { 29 | return m.currentConfig.Load().(*config.Config) 30 | } 31 | 32 | // Subscribe implements config.Watcher 33 | func (m *MockConfigWatcher) Subscribe() <-chan *config.Config { 34 | ch := make(chan *config.Config, 1) 35 | m.subscribers = append(m.subscribers, ch) 36 | 37 | // Send current config immediately 38 | cfg := m.GetCurrentConfig() 39 | select { 40 | case ch <- cfg: 41 | default: 42 | } 43 | 44 | return ch 45 | } 46 | 47 | // Close implements config.Watcher 48 | func (m *MockConfigWatcher) Close() error { 49 | for _, ch := range m.subscribers { 50 | close(ch) 51 | } 52 | m.subscribers = nil 53 | return nil 54 | } 55 | 56 | // UpdateConfig is a test helper that simulates configuration changes 57 | func (m *MockConfigWatcher) UpdateConfig(cfg *config.Config) { 58 | m.currentConfig.Store(cfg) 59 | 60 | for _, ch := range m.subscribers { 61 | select { 62 | case ch <- cfg: 63 | default: 64 | // Skip if channel is blocked 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /server/mocks/llm.go: -------------------------------------------------------------------------------- 1 | package mocks 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/teilomillet/gollm" 8 | "github.com/teilomillet/gollm/llm" 9 | "github.com/teilomillet/gollm/utils" 10 | "github.com/teilomillet/hapax/server/middleware" 11 | ) 12 | 13 | // MockLLM implements a mock LLM for testing purposes. 14 | // It provides a flexible way to simulate LLM behavior in tests without making actual API calls. 15 | // 16 | // Key features: 17 | // 1. Configurable response generation through GenerateFunc 18 | // 2. Debug logging capture through DebugFunc 19 | // 3. Default implementations for all interface methods 20 | // 21 | // Example usage: 22 | // 23 | // mockLLM := NewMockLLM(func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 24 | // return "mocked response", nil 25 | // }) 26 | type MockLLM struct { 27 | GenerateFunc func(context.Context, *gollm.Prompt) (string, error) 28 | DebugFunc func(string, ...interface{}) 29 | Provider string // Provider name for testing 30 | Model string // Model name for testing 31 | } 32 | 33 | // NewMockLLM creates a new MockLLM with optional generate function. 34 | // If generateFunc is nil, Generate will return empty string with no error. 35 | func NewMockLLM(generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM { 36 | return &MockLLM{ 37 | GenerateFunc: generateFunc, 38 | Provider: "mock", 39 | Model: "mock-model", 40 | } 41 | } 42 | 43 | // NewMockLLMWithConfig creates a new MockLLM with specific provider and model names 44 | func NewMockLLMWithConfig(provider, model string, generateFunc func(context.Context, *gollm.Prompt) (string, error)) *MockLLM { 45 | return &MockLLM{ 46 | GenerateFunc: generateFunc, 47 | Provider: provider, 48 | Model: model, 49 | } 50 | } 51 | 52 | // Generate implements the core LLM functionality. 53 | // It uses the provided GenerateFunc if available, otherwise returns empty string. 54 | // The opts parameter is ignored in the mock to simplify testing. 55 | func (m *MockLLM) Generate(ctx context.Context, prompt *gollm.Prompt, opts ...llm.GenerateOption) (string, error) { 56 | // Check for timeout header 57 | if ctx.Value(middleware.XTestTimeoutKey) != nil { 58 | // Sleep longer than the timeout 59 | time.Sleep(10 * time.Second) 60 | return "", context.DeadlineExceeded 61 | } 62 | 63 | if m.GenerateFunc != nil { 64 | return m.GenerateFunc(ctx, prompt) 65 | } 66 | return "", nil 67 | } 68 | 69 | // Debug captures debug messages if DebugFunc is provided. 70 | // This allows tests to verify logging behavior if needed. 71 | func (m *MockLLM) Debug(format string, args ...interface{}) { 72 | if m.DebugFunc != nil { 73 | m.DebugFunc(format, args...) 74 | } 75 | } 76 | 77 | // GetPromptJSONSchema returns a minimal valid JSON schema. 78 | // This is useful for testing schema validation without complex schemas. 79 | func (m *MockLLM) GetPromptJSONSchema(opts ...gollm.SchemaOption) ([]byte, error) { 80 | return []byte(`{}`), nil 81 | } 82 | 83 | // GetProvider returns the mock provider name 84 | func (m *MockLLM) GetProvider() string { 85 | return m.Provider 86 | } 87 | 88 | // GetModel returns the mock model name 89 | func (m *MockLLM) GetModel() string { 90 | return m.Model 91 | } 92 | 93 | // GetLogLevel returns a default log level. 94 | // Tests can rely on this consistent behavior. 95 | func (m *MockLLM) GetLogLevel() gollm.LogLevel { 96 | return gollm.LogLevelInfo 97 | } 98 | 99 | // UpdateLogLevel is a no-op in the mock. 100 | // Real implementation would change logging behavior. 101 | func (m *MockLLM) UpdateLogLevel(level gollm.LogLevel) { 102 | // No-op for mock 103 | } 104 | 105 | // SetLogLevel is a no-op in the mock. 106 | // Real implementation would change logging behavior. 107 | func (m *MockLLM) SetLogLevel(level gollm.LogLevel) { 108 | // No-op for mock 109 | } 110 | 111 | // GetLogger returns nil as we don't need logging in tests. 112 | // Real implementation would return a logger instance. 113 | func (m *MockLLM) GetLogger() utils.Logger { 114 | return nil 115 | } 116 | 117 | // NewPrompt creates a simple prompt with user role. 118 | // This provides consistent prompt creation for tests. 119 | func (m *MockLLM) NewPrompt(text string) *gollm.Prompt { 120 | return &gollm.Prompt{ 121 | Messages: []gollm.PromptMessage{ 122 | {Role: "user", Content: text}, 123 | }, 124 | } 125 | } 126 | 127 | // SetEndpoint is a no-op in the mock. 128 | // Real implementation would configure the API endpoint. 129 | func (m *MockLLM) SetEndpoint(endpoint string) { 130 | // No-op for mock 131 | } 132 | 133 | // SetOption is a no-op in the mock. 134 | // Real implementation would configure LLM options. 135 | func (m *MockLLM) SetOption(key string, value interface{}) { 136 | // No-op for mock 137 | } 138 | 139 | // SupportsJSONSchema returns true to indicate schema support. 140 | // This allows testing schema-related functionality. 141 | func (m *MockLLM) SupportsJSONSchema() bool { 142 | return true 143 | } 144 | 145 | // GenerateWithSchema uses the standard Generate function. 146 | // Schema validation is not performed in the mock. 147 | func (m *MockLLM) GenerateWithSchema(ctx context.Context, prompt *gollm.Prompt, schema interface{}, opts ...llm.GenerateOption) (string, error) { 148 | if m.GenerateFunc != nil { 149 | return m.GenerateFunc(ctx, prompt) 150 | } 151 | return "", nil 152 | } 153 | 154 | // SetOllamaEndpoint is a no-op in the mock. 155 | // Real implementation would configure Ollama endpoint. 156 | func (m *MockLLM) SetOllamaEndpoint(endpoint string) error { 157 | return nil 158 | } 159 | 160 | // SetSystemPrompt is a no-op in the mock. 161 | // Real implementation would set a system-level prompt. 162 | func (m *MockLLM) SetSystemPrompt(prompt string, cacheType llm.CacheType) { 163 | // No-op for mock 164 | } 165 | -------------------------------------------------------------------------------- /server/processing/processor.go: -------------------------------------------------------------------------------- 1 | // Package processing provides request processing and response formatting for LLM interactions. 2 | package processing 3 | 4 | import ( 5 | "bytes" 6 | "context" 7 | "fmt" 8 | "strings" 9 | "text/template" 10 | 11 | "github.com/teilomillet/gollm" 12 | "github.com/teilomillet/hapax/config" 13 | ) 14 | 15 | // Processor handles request processing and response formatting for LLM interactions. 16 | // It uses Go templates to transform incoming requests into LLM-compatible formats, 17 | // communicates with the LLM, and formats the responses according to configuration. 18 | // 19 | // Key features: 20 | // - Template-based request transformation 21 | // - Configurable response formatting 22 | // - Support for both simple and chat completions 23 | // - System prompt management 24 | // 25 | // The Processor is designed to be reusable across different request types 26 | // while maintaining consistent formatting and error handling. 27 | type Processor struct { 28 | llm gollm.LLM // The LLM instance to use for generation 29 | templates map[string]*template.Template // Compiled templates for request formatting 30 | config *config.ProcessingConfig // Configuration for processing behavior 31 | defaultPrompt string // Default system prompt for all requests 32 | } 33 | 34 | // NewProcessor creates a new processor instance with the given configuration and LLM. 35 | // It validates the configuration and pre-compiles all templates for efficiency. 36 | // 37 | // Parameters: 38 | // - cfg: Processing configuration including templates and formatting options 39 | // - llm: LLM instance to use for text generation 40 | // 41 | // Returns: 42 | // - A new Processor instance and nil error if successful 43 | // - nil and error if configuration is invalid or template compilation fails 44 | // 45 | // The processor will fail fast if any templates are invalid, preventing runtime errors. 46 | func NewProcessor(cfg *config.ProcessingConfig, llm gollm.LLM) (*Processor, error) { 47 | if cfg == nil { 48 | return nil, fmt.Errorf("processing config is required") 49 | } 50 | if llm == nil { 51 | return nil, fmt.Errorf("LLM instance is required") 52 | } 53 | 54 | // Parse all templates at initialization to fail fast on invalid templates 55 | templates := make(map[string]*template.Template) 56 | for name, tmpl := range cfg.RequestTemplates { 57 | t, err := template.New(name).Parse(tmpl) 58 | if err != nil { 59 | return nil, fmt.Errorf("failed to parse template %s: %w", name, err) 60 | } 61 | templates[name] = t 62 | } 63 | 64 | return &Processor{ 65 | llm: llm, 66 | templates: templates, 67 | config: cfg, 68 | }, nil 69 | } 70 | 71 | // ProcessRequest handles the end-to-end processing of a request: 72 | // 1. Validates the request 73 | // 2. Selects and executes the appropriate template 74 | // 3. Creates an LLM prompt with system context 75 | // 4. Sends the request to the LLM 76 | // 5. Formats the response according to configuration 77 | // 78 | // Parameters: 79 | // - ctx: Context for the request, used for cancellation and timeouts 80 | // - req: The request to process, containing type and input data 81 | // 82 | // Returns: 83 | // - Formatted response and nil error if successful 84 | // - nil and error if any step fails 85 | // 86 | // The processor will use the "default" template if no matching template 87 | // is found for the request type. 88 | func (p *Processor) ProcessRequest(ctx context.Context, req *Request) (*Response, error) { 89 | if req == nil { 90 | return nil, fmt.Errorf("request cannot be nil") 91 | } 92 | 93 | var promptMessages []gollm.PromptMessage 94 | 95 | // Always start with system prompt if we have one 96 | if p.defaultPrompt != "" { 97 | promptMessages = append(promptMessages, gollm.PromptMessage{ 98 | Role: "system", 99 | Content: p.defaultPrompt, 100 | }) 101 | } 102 | 103 | // Now we have two clear paths - either conversation or single input 104 | if len(req.Messages) > 0 { 105 | // Add debug logging for chat requests 106 | fmt.Printf("DEBUG: Processing chat request with %d messages\n", len(req.Messages)) 107 | // For conversations, we just need to convert the messages directly 108 | for _, msg := range req.Messages { 109 | fmt.Printf("DEBUG: Adding message - Role: '%s', Content: '%s'\n", msg.Role, msg.Content) 110 | promptMessages = append(promptMessages, gollm.PromptMessage{ 111 | Role: msg.Role, 112 | Content: msg.Content, 113 | }) 114 | } 115 | } else if req.Input != "" { 116 | // Add debug logging for single input requests 117 | fmt.Printf("DEBUG: Processing single input request: '%s'\n", req.Input) 118 | // For single inputs, we still use the template system 119 | tmpl := p.templates["default"] 120 | if t, ok := p.templates[req.Type]; ok { 121 | tmpl = t 122 | } 123 | if tmpl == nil { 124 | return nil, fmt.Errorf("no template found for type: %s", req.Type) 125 | } 126 | 127 | var buf bytes.Buffer 128 | if err := tmpl.Execute(&buf, req); err != nil { 129 | return nil, fmt.Errorf("template execution failed: %w", err) 130 | } 131 | 132 | promptMessages = append(promptMessages, gollm.PromptMessage{ 133 | Role: "user", 134 | Content: buf.String(), 135 | }) 136 | } else { 137 | return nil, fmt.Errorf("request must contain either messages or input") 138 | } 139 | 140 | prompt := &gollm.Prompt{Messages: promptMessages} 141 | 142 | // Add debug logging 143 | fmt.Printf("DEBUG: About to send prompt to LLM: %+v\n", prompt) 144 | fmt.Printf("DEBUG: Number of messages in prompt: %d\n", len(prompt.Messages)) 145 | for i, msg := range prompt.Messages { 146 | fmt.Printf("DEBUG: Message[%d] - Role: '%s', Content: '%s'\n", i, msg.Role, msg.Content) 147 | } 148 | 149 | response, err := p.llm.Generate(ctx, prompt) 150 | if err != nil { 151 | return nil, fmt.Errorf("LLM processing failed: %w", err) 152 | } 153 | 154 | return p.formatResponse(response), nil 155 | } 156 | 157 | // formatResponse applies configured formatting options to the LLM response: 158 | // 1. Cleans JSON if enabled (removes markdown blocks, formats JSON) 159 | // 2. Trims whitespace if enabled 160 | // 3. Truncates to max length if configured 161 | // 162 | // This ensures consistent response format and size across different 163 | // LLM outputs and request types. 164 | func (p *Processor) formatResponse(content string) *Response { 165 | if p.config.ResponseFormatting.CleanJSON { 166 | content = gollm.CleanResponse(content) 167 | } 168 | if p.config.ResponseFormatting.TrimWhitespace { 169 | content = strings.TrimSpace(content) 170 | } 171 | if p.config.ResponseFormatting.MaxLength > 0 && len(content) > p.config.ResponseFormatting.MaxLength { 172 | content = content[:p.config.ResponseFormatting.MaxLength] 173 | } 174 | return &Response{Content: content} 175 | } 176 | 177 | // SetDefaultPrompt sets the system prompt to be used for all requests. 178 | // This prompt provides context and instructions to the LLM. 179 | func (p *Processor) SetDefaultPrompt(prompt string) { 180 | p.defaultPrompt = prompt 181 | } 182 | -------------------------------------------------------------------------------- /server/processing/types.go: -------------------------------------------------------------------------------- 1 | // Package processing provides request processing and response formatting for LLM interactions. 2 | // It handles template-based request transformation, LLM communication, and response formatting. 3 | package processing 4 | 5 | // Message represents a single message in a conversation. 6 | // This follows the standard chat format used by most LLM providers, 7 | // where each message has a role (e.g., "user", "assistant", "system") 8 | // and content (the actual message text). 9 | type Message struct { 10 | Role string `json:"role"` // Role of the message sender (e.g., "user", "assistant") 11 | Content string `json:"content"` // The actual message content 12 | } 13 | 14 | // Request represents an incoming request to the LLM service. 15 | // It supports two main types of requests: 16 | // 1. Simple completion: Using the Input field with a default template 17 | // 2. Chat completion: Using the Messages field with a chat template 18 | // 19 | // The Type field determines which template is used to format the request. 20 | // This allows for flexible request handling while maintaining a consistent 21 | // interface with the LLM. 22 | type Request struct { 23 | // Type indicates the type of request (e.g., "completion", "chat", "function") 24 | Type string `json:"type"` // Type of request (e.g., "default", "chat") 25 | Input string `json:"input"` // Used for simple completion requests 26 | Messages []Message `json:"messages,omitempty"` // Used for chat completion requests 27 | // FunctionDescription is used for function-calling requests 28 | FunctionDescription string `json:"function_description,omitempty"` 29 | } 30 | 31 | // Response represents the processed output from the LLM. 32 | // It contains the formatted content after applying any configured 33 | // transformations (e.g., JSON cleaning, whitespace trimming, length limits). 34 | // 35 | // Future extensions might include: 36 | // - Metadata about the processing (e.g., truncation info) 37 | // - Multiple response formats (e.g., text, structured data) 38 | // - Usage statistics (tokens, processing time) 39 | type Response struct { 40 | // Content is the processed response content 41 | Content string `json:"content"` // The processed response content 42 | // Error holds any error information 43 | Error string `json:"error,omitempty"` 44 | } 45 | -------------------------------------------------------------------------------- /server/provider/errors.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import "errors" 4 | 5 | var ( 6 | // ErrNoHealthyProvider indicates that no healthy provider is available 7 | ErrNoHealthyProvider = errors.New("no healthy provider available") 8 | ) 9 | -------------------------------------------------------------------------------- /server/provider/execution.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/sony/gobreaker" 9 | "github.com/teilomillet/gollm" 10 | "github.com/teilomillet/hapax/server/circuitbreaker" // Added import for custom circuit breaker 11 | "go.uber.org/zap" 12 | ) 13 | 14 | // result represents the outcome of an LLM operation 15 | type result struct { 16 | err error 17 | status HealthStatus 18 | name string 19 | } 20 | 21 | // Execute coordinates provider execution with proper error handling 22 | func (m *Manager) Execute(ctx context.Context, operation func(llm gollm.LLM) error, prompt *gollm.Prompt) error { 23 | key := m.generateRequestKey(prompt) 24 | m.logger.Debug("Starting Execute", zap.String("key", key)) 25 | 26 | v, err, shared := m.group.Do(key, func() (interface{}, error) { 27 | return m.executeWithRetries(ctx, operation) 28 | }) 29 | 30 | if err != nil { 31 | m.logger.Debug("Execute failed", zap.Error(err)) 32 | return err 33 | } 34 | 35 | m.handleRequestMetrics(shared) 36 | return m.processResult(v.(*result)) 37 | } 38 | 39 | func (m *Manager) executeWithRetries(ctx context.Context, operation func(llm gollm.LLM) error) (*result, error) { 40 | preference := m.getProviderPreference() 41 | if len(preference) == 0 { 42 | return &result{ 43 | err: fmt.Errorf("no providers configured"), 44 | }, fmt.Errorf("no providers configured") 45 | } 46 | 47 | var lastResult *result 48 | 49 | // Try each provider in sequence 50 | for _, name := range preference { 51 | provider, breaker, status := m.getProviderResources(name) 52 | if provider == nil || breaker == nil || !status.Healthy { 53 | continue 54 | } 55 | 56 | // Try the current provider 57 | currentResult := m.executeOperation(ctx, operation, provider, breaker, status, name) 58 | lastResult = currentResult 59 | 60 | if currentResult.err == nil { 61 | // Success case - return immediately 62 | return currentResult, nil 63 | } 64 | 65 | // **Key Insight** 66 | // ================= 67 | // 68 | // The key insight nderstand the relationship between single-request behavior and cross-request state. 69 | // The circuit breaker maintains state across requests, but each individual request needs clear, predictable behavior. 70 | 71 | // **Request Flow** 72 | // =============== 73 | // 74 | // When the first request comes in: 75 | // 1. The breaker is closed (not open). 76 | // 2. We hit the else clause. 77 | // 3. We return the primary error immediately. 78 | // 4. This failure gets recorded in the circuit breaker's state. 79 | 80 | // For the second request: 81 | // 1. The primary provider fails again. 82 | // 2. This triggers the circuit breaker to open. 83 | // 3. Because the breaker is now open, we hit the first condition. 84 | // 4. The continue statement moves us to try the backup provider. 85 | // 5. All of this happens within the same request. 86 | 87 | // **Properties Maintained** 88 | // ======================= 89 | // 90 | // This pattern maintains two important properties: 91 | // 1. **Isolation**: Each request has clear, predictable behavior. 92 | // 2. **State Evolution**: The circuit breaker accumulates state across requests. 93 | 94 | // Circuit Breaker Logic 95 | if breaker.State() == gobreaker.StateOpen { 96 | // If the circuit breaker is open, we check if we're at the last provider in the preference list. 97 | // If we are, we return the primary error immediately. 98 | if name == preference[len(preference)-1] { 99 | return currentResult, currentResult.err // This gives us the immediate failure 100 | } 101 | // Continue to the next provider if we are not at the last one. 102 | continue 103 | } else { 104 | // If the breaker is closed, we return the primary error immediately. 105 | return currentResult, currentResult.err // This gives us the immediate failure 106 | } 107 | } 108 | 109 | // Error Handling 110 | // We always maintain a valid result structure to prevent nil pointer dereference. 111 | if lastResult == nil { 112 | return &result{ 113 | err: fmt.Errorf("no healthy provider available"), 114 | }, fmt.Errorf("no healthy provider available") 115 | } 116 | 117 | return lastResult, lastResult.err 118 | } 119 | 120 | // executeOperation handles a single operation attempt with proper resource cleanup 121 | func (m *Manager) executeOperation( 122 | ctx context.Context, 123 | operation func(llm gollm.LLM) error, 124 | provider gollm.LLM, 125 | breaker *circuitbreaker.CircuitBreaker, 126 | status HealthStatus, 127 | name string) *result { 128 | 129 | start := time.Now() 130 | 131 | err := breaker.Execute(func() error { 132 | // Always check context before executing operation 133 | if err := ctx.Err(); err != nil { 134 | return err 135 | } 136 | return operation(provider) 137 | }) 138 | 139 | duration := time.Since(start) 140 | breakerState := breaker.State() 141 | breakerCounts := breaker.Counts() 142 | 143 | if err != nil { 144 | m.logger.Debug("operation failed", 145 | zap.String("provider", name), 146 | zap.Error(err), 147 | zap.Duration("duration", duration), 148 | zap.String("breaker_state", breakerState.String()), 149 | zap.Uint32("consecutive_failures", breakerCounts.ConsecutiveFailures)) 150 | 151 | return &result{ 152 | err: err, 153 | status: HealthStatus{ 154 | Healthy: false, 155 | LastCheck: time.Now(), 156 | ErrorCount: status.ErrorCount + 1, 157 | ConsecutiveFails: int(breakerCounts.ConsecutiveFailures), 158 | Latency: duration, 159 | RequestCount: status.RequestCount + 1, 160 | }, 161 | name: name, 162 | } 163 | } 164 | 165 | return &result{ 166 | err: nil, 167 | status: HealthStatus{ 168 | Healthy: true, 169 | LastCheck: time.Now(), 170 | ErrorCount: 0, 171 | ConsecutiveFails: 0, 172 | Latency: duration, 173 | RequestCount: status.RequestCount + 1, 174 | }, 175 | name: name, 176 | } 177 | } 178 | 179 | // generateRequestKey creates a consistent key based on the prompt content and role 180 | func (m *Manager) generateRequestKey(prompt *gollm.Prompt) string { 181 | return fmt.Sprintf("%s-%s", prompt.Messages[0].Content, prompt.Messages[0].Role) 182 | } 183 | 184 | // getProviderPreference safely retrieves the current provider preference list 185 | func (m *Manager) getProviderPreference() []string { 186 | m.mu.RLock() 187 | defer m.mu.RUnlock() 188 | preference := make([]string, len(m.cfg.ProviderPreference)) 189 | copy(preference, m.cfg.ProviderPreference) 190 | return preference 191 | } 192 | 193 | // getProviderResources safely retrieves provider-related resources 194 | func (m *Manager) getProviderResources(name string) (gollm.LLM, *circuitbreaker.CircuitBreaker, HealthStatus) { 195 | m.mu.RLock() 196 | defer m.mu.RUnlock() 197 | 198 | provider, exists := m.providers[name] 199 | if !exists { 200 | return nil, nil, HealthStatus{} 201 | } 202 | 203 | return provider, m.breakers[name], m.GetHealthStatus(name) 204 | } 205 | 206 | // handleRequestMetrics updates metrics for deduplicated requests 207 | func (m *Manager) handleRequestMetrics(shared bool) { 208 | if shared { 209 | m.deduplicatedRequests.Inc() 210 | } 211 | } 212 | 213 | // processResult handles the final result and updates provider health status 214 | func (m *Manager) processResult(r *result) error { 215 | if r.name != "" { 216 | m.UpdateHealthStatus(r.name, r.status) 217 | } 218 | return r.err 219 | } 220 | -------------------------------------------------------------------------------- /server/provider/health.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | "github.com/teilomillet/gollm" 9 | "go.uber.org/zap" 10 | ) 11 | 12 | // HealthStatus represents the current health state of a provider 13 | type HealthStatus struct { 14 | Healthy bool // Whether the provider is currently healthy 15 | LastCheck time.Time // When the last health check was performed 16 | ConsecutiveFails int // Number of consecutive failures 17 | Latency time.Duration // Last observed latency 18 | ErrorCount int64 // Total number of errors 19 | RequestCount int64 // Total number of requests 20 | } 21 | 22 | // startHealthChecks begins monitoring all providers 23 | func (m *Manager) startHealthChecks(ctx context.Context) { 24 | interval := time.Minute 25 | ticker := time.NewTicker(interval) 26 | defer ticker.Stop() 27 | 28 | for { 29 | select { 30 | case <-ctx.Done(): 31 | return 32 | case <-ticker.C: 33 | m.checkAllProviders() 34 | } 35 | } 36 | } 37 | 38 | // checkAllProviders performs health checks on all providers 39 | func (m *Manager) checkAllProviders() { 40 | for name, provider := range m.providers { 41 | start := time.Now() 42 | 43 | // Get the current health status 44 | var status HealthStatus 45 | if val, ok := m.healthStates.Load(name); ok { 46 | status = val.(HealthStatus) 47 | } 48 | 49 | // Perform health check 50 | err := m.healthCheck(provider) 51 | duration := time.Since(start) 52 | 53 | // Update metrics 54 | m.healthCheckDuration.Observe(duration.Seconds()) 55 | 56 | if err != nil { 57 | m.healthCheckErrors.WithLabelValues(name).Inc() 58 | status.Healthy = false 59 | status.ErrorCount++ 60 | } else { 61 | status.Healthy = true 62 | status.ErrorCount = 0 63 | } 64 | 65 | status.LastCheck = time.Now() 66 | m.UpdateHealthStatus(name, status) 67 | } 68 | } 69 | 70 | // CheckProviderHealth performs a health check on a provider 71 | func (m *Manager) CheckProviderHealth(name string, llm gollm.LLM) HealthStatus { 72 | return m.checkProviderHealth(name, llm) 73 | } 74 | 75 | // checkProviderHealth performs a health check on a provider 76 | func (m *Manager) checkProviderHealth(name string, llm gollm.LLM) HealthStatus { 77 | start := time.Now() 78 | status := HealthStatus{ 79 | LastCheck: start, 80 | Healthy: true, 81 | } 82 | 83 | // Get previous status if any 84 | if val, ok := m.healthStates.Load(name); ok { 85 | prevStatus := val.(HealthStatus) 86 | status.ConsecutiveFails = prevStatus.ConsecutiveFails 87 | status.ErrorCount = prevStatus.ErrorCount 88 | status.RequestCount = prevStatus.RequestCount 89 | } 90 | 91 | // Simple health check prompt 92 | prompt := &gollm.Prompt{ 93 | Messages: []gollm.PromptMessage{ 94 | {Role: "user", Content: "health check"}, 95 | }, 96 | } 97 | 98 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 99 | defer cancel() 100 | 101 | _, err := llm.Generate(ctx, prompt) 102 | status.Latency = time.Since(start) 103 | m.healthCheckDuration.Observe(status.Latency.Seconds()) 104 | 105 | if err != nil { 106 | status.Healthy = false 107 | status.ConsecutiveFails++ 108 | status.ErrorCount++ 109 | m.healthCheckErrors.WithLabelValues(name).Inc() 110 | m.logger.Warn("Provider health check failed", 111 | zap.String("provider", name), 112 | zap.Error(err), 113 | zap.Duration("latency", status.Latency), 114 | ) 115 | } else { 116 | status.ConsecutiveFails = 0 117 | } 118 | 119 | status.RequestCount++ 120 | return status 121 | } 122 | 123 | // GetHealthCheckErrors returns the health check errors counter for testing 124 | func (m *Manager) GetHealthCheckErrors() *prometheus.CounterVec { 125 | return m.healthCheckErrors 126 | } 127 | 128 | // GetHealthStatus returns the health status for a provider 129 | func (m *Manager) GetHealthStatus(name string) HealthStatus { 130 | if val, ok := m.healthStates.Load(name); ok { 131 | return val.(HealthStatus) 132 | } 133 | return HealthStatus{} 134 | } 135 | 136 | // UpdateHealthStatus updates the health status for a provider 137 | func (m *Manager) UpdateHealthStatus(name string, status HealthStatus) { 138 | m.mu.Lock() 139 | defer m.mu.Unlock() 140 | 141 | // Get the current status 142 | var currentStatus HealthStatus 143 | if val, ok := m.healthStates.Load(name); ok { 144 | currentStatus = val.(HealthStatus) 145 | } 146 | 147 | // Update the status 148 | newStatus := HealthStatus{ 149 | Healthy: status.Healthy, 150 | LastCheck: status.LastCheck, 151 | ErrorCount: status.ErrorCount, 152 | } 153 | 154 | // If the status is becoming healthy, reset error count 155 | if status.Healthy && !currentStatus.Healthy { 156 | newStatus.ErrorCount = 0 157 | } 158 | 159 | // Store the new status 160 | m.healthStates.Store(name, newStatus) 161 | 162 | // Update metrics 163 | if status.Healthy { 164 | m.healthyProviders.WithLabelValues(name).Set(1) 165 | } else { 166 | m.healthyProviders.WithLabelValues(name).Set(0) 167 | } 168 | } 169 | 170 | func (m *Manager) healthCheck(provider gollm.LLM) error { 171 | // Simple health check prompt 172 | prompt := &gollm.Prompt{ 173 | Messages: []gollm.PromptMessage{ 174 | {Role: "user", Content: "health check"}, 175 | }, 176 | } 177 | 178 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 179 | defer cancel() 180 | 181 | _, err := provider.Generate(ctx, prompt) 182 | return err 183 | } 184 | 185 | // PerformHealthCheck performs a health check on all providers 186 | func (m *Manager) PerformHealthCheck() { 187 | for name, provider := range m.providers { 188 | start := time.Now() 189 | 190 | // Get the current health status 191 | var status HealthStatus 192 | if val, ok := m.healthStates.Load(name); ok { 193 | status = val.(HealthStatus) 194 | } 195 | 196 | // Perform health check 197 | err := m.healthCheck(provider) 198 | duration := time.Since(start) 199 | 200 | // Update metrics 201 | m.healthCheckDuration.Observe(duration.Seconds()) 202 | 203 | if err != nil { 204 | m.healthCheckErrors.WithLabelValues(name).Inc() 205 | status.Healthy = false 206 | status.ErrorCount++ 207 | } else { 208 | status.Healthy = true 209 | status.ErrorCount = 0 210 | } 211 | 212 | status.LastCheck = time.Now() 213 | m.UpdateHealthStatus(name, status) 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /server/provider/manager_singleflight_test.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "sync/atomic" 8 | "testing" 9 | "time" 10 | 11 | "github.com/prometheus/client_golang/prometheus" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | "github.com/teilomillet/gollm" 15 | "github.com/teilomillet/hapax/config" 16 | "github.com/teilomillet/hapax/server/mocks" 17 | "go.uber.org/zap" 18 | ) 19 | 20 | func TestManagerSingleflight(t *testing.T) { 21 | t.Parallel() 22 | 23 | tests := []struct { 24 | name string 25 | testFn func(*testing.T, *Manager) 26 | }{ 27 | { 28 | name: "Concurrent identical requests are deduplicated", 29 | testFn: func(t *testing.T, m *Manager) { 30 | var callCount atomic.Int32 31 | mock := mocks.NewMockLLMWithConfig("test", "model", func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 32 | callCount.Add(1) 33 | // Small sleep to ensure concurrent requests overlap 34 | time.Sleep(10 * time.Millisecond) 35 | return "response", nil 36 | }) 37 | 38 | m.SetProviders(map[string]gollm.LLM{"test": mock}) 39 | m.UpdateHealthStatus("test", HealthStatus{ 40 | Healthy: true, 41 | LastCheck: time.Now(), 42 | ErrorCount: 0, 43 | }) 44 | 45 | // Create identical prompts 46 | prompt := &gollm.Prompt{Messages: []gollm.PromptMessage{{ 47 | Role: "user", 48 | Content: "test", 49 | }}} 50 | 51 | // Launch concurrent requests 52 | var wg sync.WaitGroup 53 | errs := make([]error, 5) 54 | for i := 0; i < 5; i++ { 55 | wg.Add(1) 56 | go func(idx int) { 57 | defer wg.Done() 58 | errs[idx] = m.Execute(context.Background(), func(llm gollm.LLM) error { 59 | _, err := llm.Generate(context.Background(), prompt) 60 | return err 61 | }, prompt) 62 | }(i) 63 | } 64 | 65 | waitWithTimeout(&wg, t, 100*time.Millisecond) 66 | 67 | // Verify results 68 | for _, err := range errs { 69 | assert.NoError(t, err) 70 | } 71 | 72 | // Should only be called once due to deduplication 73 | assert.Equal(t, int32(1), callCount.Load()) 74 | }, 75 | }, 76 | { 77 | name: "Different requests are not deduplicated", 78 | testFn: func(t *testing.T, m *Manager) { 79 | var callCount atomic.Int32 80 | mock := mocks.NewMockLLMWithConfig("test", "model", func(ctx context.Context, prompt *gollm.Prompt) (string, error) { 81 | callCount.Add(1) 82 | time.Sleep(10 * time.Millisecond) 83 | return "response", nil 84 | }) 85 | 86 | m.SetProviders(map[string]gollm.LLM{"test": mock}) 87 | m.UpdateHealthStatus("test", HealthStatus{ 88 | Healthy: true, 89 | LastCheck: time.Now(), 90 | ErrorCount: 0, 91 | }) 92 | 93 | var wg sync.WaitGroup 94 | for i := 0; i < 3; i++ { 95 | wg.Add(1) 96 | go func(idx int) { 97 | defer wg.Done() 98 | // Different prompts 99 | prompt := &gollm.Prompt{Messages: []gollm.PromptMessage{{ 100 | Role: "user", 101 | Content: fmt.Sprintf("test-%d", idx), 102 | }}} 103 | _ = m.Execute(context.Background(), func(llm gollm.LLM) error { 104 | _, err := llm.Generate(context.Background(), prompt) 105 | return err 106 | }, prompt) 107 | }(i) 108 | } 109 | 110 | waitWithTimeout(&wg, t, 100*time.Millisecond) 111 | assert.Equal(t, int32(3), callCount.Load()) 112 | }, 113 | }, 114 | } 115 | 116 | for _, tt := range tests { 117 | tt := tt 118 | t.Run(tt.name, func(t *testing.T) { 119 | t.Parallel() 120 | cfg := &config.Config{ 121 | TestMode: true, 122 | Providers: map[string]config.ProviderConfig{ 123 | "test": {Type: "test", Model: "model"}, 124 | }, 125 | ProviderPreference: []string{"test"}, 126 | CircuitBreaker: config.CircuitBreakerConfig{ 127 | MaxRequests: 1, 128 | Interval: 10 * time.Millisecond, 129 | Timeout: 10 * time.Millisecond, 130 | FailureThreshold: 2, 131 | TestMode: true, 132 | }, 133 | } 134 | 135 | manager, err := NewManager(cfg, zap.NewNop(), prometheus.NewRegistry()) 136 | require.NoError(t, err) 137 | tt.testFn(t, manager) 138 | }) 139 | } 140 | } 141 | 142 | // Helper function to wait for WaitGroup with timeout 143 | func waitWithTimeout(wg *sync.WaitGroup, t *testing.T, timeout time.Duration) { 144 | done := make(chan struct{}) 145 | go func() { 146 | wg.Wait() 147 | close(done) 148 | }() 149 | 150 | select { 151 | case <-done: 152 | // Success path - continue 153 | case <-time.After(timeout): 154 | t.Fatal("Test timed out waiting for concurrent requests") 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /server/provider/metrics.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | // initializeMetrics sets up Prometheus metrics 6 | func (m *Manager) initializeMetrics(registry *prometheus.Registry) { 7 | m.healthCheckDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ 8 | Name: "hapax_health_check_duration_seconds", 9 | Help: "Duration of provider health checks", 10 | }) 11 | 12 | m.healthCheckErrors = prometheus.NewCounterVec(prometheus.CounterOpts{ 13 | Name: "hapax_health_check_errors_total", 14 | Help: "Number of health check errors by provider", 15 | }, []string{"provider"}) 16 | 17 | m.requestLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 18 | Name: "hapax_request_latency_seconds", 19 | Help: "Latency of provider requests", 20 | }, []string{"provider"}) 21 | 22 | m.deduplicatedRequests = prometheus.NewCounter(prometheus.CounterOpts{ 23 | Name: "hapax_deduplicated_requests_total", 24 | Help: "Number of deduplicated requests", 25 | }) 26 | 27 | m.healthyProviders = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 28 | Name: "hapax_healthy_providers", 29 | Help: "Number of healthy providers", 30 | }, []string{"provider"}) 31 | 32 | registry.MustRegister(m.healthCheckDuration) 33 | registry.MustRegister(m.healthCheckErrors) 34 | registry.MustRegister(m.requestLatency) 35 | registry.MustRegister(m.deduplicatedRequests) 36 | registry.MustRegister(m.healthyProviders) 37 | } 38 | -------------------------------------------------------------------------------- /server/provider/provider.go: -------------------------------------------------------------------------------- 1 | package provider 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | 9 | "github.com/prometheus/client_golang/prometheus" 10 | "github.com/sony/gobreaker" 11 | "github.com/teilomillet/gollm" 12 | "github.com/teilomillet/hapax/config" 13 | "github.com/teilomillet/hapax/server/circuitbreaker" 14 | "go.uber.org/zap" 15 | "golang.org/x/sync/singleflight" 16 | ) 17 | 18 | // Manager handles LLM provider management and selection 19 | type Manager struct { 20 | providers map[string]gollm.LLM 21 | breakers map[string]*circuitbreaker.CircuitBreaker 22 | healthStates sync.Map // map[string]HealthStatus 23 | logger *zap.Logger 24 | cfg *config.Config 25 | mu sync.RWMutex 26 | group *singleflight.Group // For deduplicating identical requests 27 | 28 | // Metrics 29 | registry *prometheus.Registry 30 | healthCheckDuration prometheus.Histogram 31 | healthCheckErrors *prometheus.CounterVec 32 | requestLatency *prometheus.HistogramVec 33 | deduplicatedRequests prometheus.Counter // New metric for tracking deduplicated requests 34 | healthyProviders *prometheus.GaugeVec 35 | } 36 | 37 | // NewManager creates a new provider manager 38 | func NewManager(cfg *config.Config, logger *zap.Logger, registry *prometheus.Registry) (*Manager, error) { 39 | m := &Manager{ 40 | providers: make(map[string]gollm.LLM), 41 | breakers: make(map[string]*circuitbreaker.CircuitBreaker), 42 | logger: logger, 43 | cfg: cfg, 44 | registry: registry, 45 | group: &singleflight.Group{}, 46 | } 47 | 48 | // Initialize metrics 49 | m.initializeMetrics(registry) 50 | 51 | // Initialize providers from both new and legacy configs 52 | if !cfg.TestMode { 53 | if err := m.initializeProviders(); err != nil { 54 | return nil, err 55 | } 56 | } 57 | 58 | // Start health checks if enabled 59 | if cfg.LLM.HealthCheck != nil && cfg.LLM.HealthCheck.Enabled { 60 | go m.startHealthChecks(context.Background()) 61 | } 62 | 63 | return m, nil 64 | } 65 | 66 | // initializeProviders sets up LLM providers based on configuration 67 | func (m *Manager) initializeProviders() error { 68 | m.providers = make(map[string]gollm.LLM) 69 | m.breakers = make(map[string]*circuitbreaker.CircuitBreaker) 70 | 71 | for name, cfg := range m.cfg.Providers { 72 | provider, err := m.initializeProvider(name, cfg) 73 | if err != nil { 74 | return fmt.Errorf("failed to initialize provider %s: %w", name, err) 75 | } 76 | 77 | m.providers[name] = provider 78 | m.logger.Info("Created LLM", 79 | zap.String("provider", name), 80 | zap.String("model", cfg.Model), 81 | zap.Int("api_key_length", len(cfg.APIKey))) 82 | 83 | // Initialize provider as healthy 84 | m.UpdateHealthStatus(name, HealthStatus{ 85 | Healthy: true, 86 | LastCheck: time.Now(), 87 | ErrorCount: 0, 88 | }) 89 | 90 | // Initialize circuit breaker with gobreaker configuration 91 | cbConfig := circuitbreaker.Config{ 92 | Name: name, 93 | MaxRequests: 1, // Allow 1 request in half-open state 94 | Interval: time.Minute * 2, // Cyclic period of closed state 95 | Timeout: time.Minute, // Period of open state 96 | FailureThreshold: 3, // Trip after 3 failures 97 | TestMode: m.cfg.CircuitBreaker.TestMode, 98 | } 99 | 100 | // Override with config values if provided 101 | if m.cfg.CircuitBreaker.Timeout > 0 { 102 | cbConfig.Timeout = m.cfg.CircuitBreaker.Timeout 103 | } 104 | if m.cfg.CircuitBreaker.MaxRequests > 0 { 105 | cbConfig.MaxRequests = m.cfg.CircuitBreaker.MaxRequests 106 | } 107 | 108 | breaker, err := circuitbreaker.NewCircuitBreaker(cbConfig, m.logger, m.registry) 109 | if err != nil { 110 | return fmt.Errorf("failed to create circuit breaker for %s: %w", name, err) 111 | } 112 | m.breakers[name] = breaker 113 | } 114 | 115 | return nil 116 | } 117 | 118 | // initializeProvider initializes a single LLM provider 119 | func (m *Manager) initializeProvider(_ string, cfg config.ProviderConfig) (gollm.LLM, error) { 120 | provider, err := gollm.NewLLM( 121 | gollm.SetProvider(cfg.Type), 122 | gollm.SetModel(cfg.Model), 123 | gollm.SetAPIKey(cfg.APIKey), 124 | ) 125 | if err != nil { 126 | return nil, err 127 | } 128 | 129 | return provider, nil 130 | } 131 | 132 | // GetProvider returns a healthy provider or error if none available 133 | func (m *Manager) GetProvider() (gollm.LLM, error) { 134 | m.mu.RLock() 135 | defer m.mu.RUnlock() 136 | 137 | // Try each provider in order of preference 138 | for _, name := range m.cfg.ProviderPreference { 139 | provider, exists := m.providers[name] 140 | if !exists { 141 | continue 142 | } 143 | 144 | // Skip if provider is unhealthy 145 | status := m.GetHealthStatus(name) 146 | if !status.Healthy { 147 | continue 148 | } 149 | 150 | // Skip if circuit breaker is open 151 | breaker := m.breakers[name] 152 | if breaker != nil && breaker.State() == gobreaker.StateOpen { 153 | continue 154 | } 155 | 156 | return provider, nil 157 | } 158 | 159 | return nil, fmt.Errorf("no healthy provider available") 160 | } 161 | 162 | // SetProviders replaces the current providers with new ones (for testing) 163 | func (m *Manager) SetProviders(providers map[string]gollm.LLM) { 164 | m.mu.Lock() 165 | defer m.mu.Unlock() 166 | 167 | // Clear existing providers and breakers 168 | m.providers = make(map[string]gollm.LLM) 169 | m.breakers = make(map[string]*circuitbreaker.CircuitBreaker) 170 | 171 | // Set up new providers 172 | for name, provider := range providers { 173 | m.providers[name] = provider 174 | 175 | // Create circuit breaker for provider 176 | cbConfig := circuitbreaker.Config{ 177 | Name: name, 178 | MaxRequests: 1, 179 | Interval: time.Second, 180 | Timeout: m.cfg.CircuitBreaker.Timeout, 181 | FailureThreshold: 2, 182 | TestMode: m.cfg.CircuitBreaker.TestMode, 183 | } 184 | 185 | breaker, err := circuitbreaker.NewCircuitBreaker(cbConfig, m.logger, m.registry) 186 | if err != nil { 187 | m.logger.Error("Failed to create circuit breaker", 188 | zap.String("provider", name), 189 | zap.Error(err)) 190 | continue 191 | } 192 | 193 | m.breakers[name] = breaker 194 | 195 | // Initialize health status directly without calling UpdateHealthStatus 196 | m.healthStates.Store(name, HealthStatus{ 197 | Healthy: true, 198 | LastCheck: time.Now(), 199 | ErrorCount: 0, 200 | }) 201 | } 202 | 203 | // Create a map to track which providers have been added to the preference list 204 | added := make(map[string]bool) 205 | 206 | // Keep existing provider preference order for providers that still exist 207 | newPreference := make([]string, 0, len(providers)) 208 | for _, name := range m.cfg.ProviderPreference { 209 | if _, exists := providers[name]; exists { 210 | newPreference = append(newPreference, name) 211 | added[name] = true 212 | } 213 | } 214 | 215 | // Add any new providers that weren't in the original preference list 216 | for name := range providers { 217 | if !added[name] { 218 | newPreference = append(newPreference, name) 219 | } 220 | } 221 | 222 | m.cfg.ProviderPreference = newPreference 223 | m.logger.Debug("updated provider preference list", zap.Strings("preference", newPreference)) 224 | } 225 | -------------------------------------------------------------------------------- /server/routing/metrics.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "net/http" 5 | 6 | "github.com/teilomillet/hapax/server/metrics" 7 | ) 8 | 9 | // RegisterMetricsRoutes adds routes for Prometheus metrics 10 | func RegisterMetricsRoutes(mux *http.ServeMux, m *metrics.Metrics) { 11 | mux.Handle("/metrics", m.Handler()) 12 | } 13 | -------------------------------------------------------------------------------- /server/routing/metrics_test.go: -------------------------------------------------------------------------------- 1 | package routing 2 | 3 | import ( 4 | "io" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/teilomillet/hapax/server/metrics" 11 | ) 12 | 13 | func TestRegisterMetricsRoutes(t *testing.T) { 14 | // Create new metrics instance for testing 15 | m := metrics.NewMetrics() 16 | 17 | // Create new mux 18 | mux := http.NewServeMux() 19 | RegisterMetricsRoutes(mux, m) 20 | 21 | // Create test server 22 | server := httptest.NewServer(mux) 23 | defer server.Close() 24 | 25 | // Make a test request to increment some metrics 26 | m.RequestsTotal.WithLabelValues("/test", "200").Inc() 27 | m.ErrorsTotal.WithLabelValues("server_error").Inc() 28 | m.RateLimitHits.WithLabelValues("test_client").Inc() 29 | 30 | // Test metrics endpoint 31 | resp, err := http.Get(server.URL + "/metrics") 32 | assert.NoError(t, err) 33 | defer resp.Body.Close() 34 | 35 | // Check response code 36 | assert.Equal(t, http.StatusOK, resp.StatusCode) 37 | 38 | // Check content type 39 | contentType := resp.Header.Get("Content-Type") 40 | assert.Contains(t, contentType, "text/plain") 41 | 42 | // Read response body 43 | body, err := io.ReadAll(resp.Body) 44 | assert.NoError(t, err) 45 | 46 | // Verify response contains our metrics 47 | bodyStr := string(body) 48 | expectedMetrics := []string{ 49 | "hapax_http_requests_total", 50 | "hapax_errors_total", 51 | "hapax_rate_limit_hits_total", 52 | } 53 | 54 | for _, metric := range expectedMetrics { 55 | assert.Contains(t, bodyStr, metric, "response should contain metric '%s'", metric) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /server/validation/middleware_test.go: -------------------------------------------------------------------------------- 1 | package validation 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "net/http" 7 | "net/http/httptest" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/assert" 11 | "github.com/teilomillet/hapax/config" 12 | ) 13 | 14 | func TestValidateCompletion(t *testing.T) { 15 | // Initialize middleware with config 16 | cfg := &config.Config{ 17 | LLM: config.LLMConfig{ 18 | Model: "gpt-4", 19 | MaxContextTokens: 100, 20 | }, 21 | } 22 | err := Initialize(cfg) 23 | assert.NoError(t, err) 24 | 25 | tests := []struct { 26 | name string 27 | contentType string 28 | body interface{} 29 | expectedStatus int 30 | expectedError bool 31 | expectedDetails map[string]string // Map of field names to expected error messages 32 | expectedCode string // Expected error code 33 | suggestion string // Expected suggestion message 34 | }{ 35 | { 36 | name: "valid request", 37 | contentType: "application/json", 38 | body: CompletionRequest{ 39 | Messages: []Message{ 40 | {Role: "user", Content: "Hello"}, 41 | }, 42 | }, 43 | expectedStatus: http.StatusOK, 44 | expectedError: false, 45 | }, 46 | { 47 | name: "missing required content field", 48 | contentType: "application/json", 49 | body: CompletionRequest{ 50 | Messages: []Message{ 51 | {Role: "user", Content: ""}, // Empty content 52 | }, 53 | }, 54 | expectedStatus: http.StatusUnprocessableEntity, 55 | expectedError: true, 56 | expectedDetails: map[string]string{ 57 | "messages[0].content": "field 'content' is required", 58 | }, 59 | expectedCode: "required_validation_failed", 60 | suggestion: "The request format is correct but the content is invalid", 61 | }, 62 | { 63 | name: "invalid role value", 64 | contentType: "application/json", 65 | body: CompletionRequest{ 66 | Messages: []Message{ 67 | {Role: "invalid", Content: "Hello"}, 68 | }, 69 | }, 70 | expectedStatus: http.StatusUnprocessableEntity, 71 | expectedError: true, 72 | expectedDetails: map[string]string{ 73 | "messages[0].role": "role must be one of: user, assistant, system", 74 | }, 75 | expectedCode: "oneof_validation_failed", 76 | suggestion: "The request format is correct but the content is invalid", 77 | }, 78 | { 79 | name: "invalid role value", 80 | contentType: "application/json", 81 | body: CompletionRequest{ 82 | Messages: []Message{ 83 | {Role: "invalid", Content: "Hello"}, 84 | }, 85 | }, 86 | expectedStatus: http.StatusUnprocessableEntity, 87 | expectedError: true, 88 | expectedDetails: map[string]string{ 89 | "messages[0].role": "role must be one of: user, assistant, system", 90 | }, 91 | expectedCode: "oneof_validation_failed", 92 | suggestion: "The request format is correct but the content is invalid", 93 | }, 94 | { 95 | name: "token limit exceeded", 96 | contentType: "application/json", 97 | body: CompletionRequest{ 98 | Messages: []Message{ 99 | {Role: "user", Content: string(make([]byte, 1000))}, // Large content 100 | }, 101 | }, 102 | expectedStatus: http.StatusUnprocessableEntity, 103 | expectedError: true, 104 | expectedDetails: map[string]string{ 105 | "messages": "token limit exceeded", 106 | }, 107 | expectedCode: "token_limit_exceeded", 108 | suggestion: "The request format is correct but the content is invalid", 109 | }, 110 | } 111 | 112 | for _, tt := range tests { 113 | t.Run(tt.name, func(t *testing.T) { 114 | // Create request body 115 | var bodyBytes []byte 116 | var err error 117 | 118 | switch v := tt.body.(type) { 119 | case string: 120 | bodyBytes = []byte(v) 121 | default: 122 | bodyBytes, err = json.Marshal(tt.body) 123 | assert.NoError(t, err) 124 | } 125 | 126 | // Create request with a test request ID 127 | req := httptest.NewRequest(http.MethodPost, "/v1/completions", bytes.NewBuffer(bodyBytes)) 128 | req.Header.Set("X-Request-ID", "test-request-id") 129 | if tt.contentType != "" { 130 | req.Header.Set("Content-Type", tt.contentType) 131 | } 132 | 133 | // Create response recorder 134 | w := httptest.NewRecorder() 135 | 136 | // Create test handler 137 | handler := ValidateCompletion(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 138 | w.WriteHeader(http.StatusOK) 139 | })) 140 | 141 | // Handle request 142 | handler.ServeHTTP(w, req) 143 | 144 | // Assert response status code 145 | assert.Equal(t, tt.expectedStatus, w.Code) 146 | 147 | if tt.expectedError { 148 | var errorResp APIError 149 | err := json.Unmarshal(w.Body.Bytes(), &errorResp) 150 | assert.NoError(t, err, "Failed to unmarshal error response") 151 | 152 | // Verify error structure 153 | assert.Equal(t, "validation_error", errorResp.Type) 154 | assert.Equal(t, "test-request-id", errorResp.RequestID) 155 | assert.Equal(t, tt.expectedStatus, errorResp.Code) 156 | 157 | if tt.suggestion != "" { 158 | assert.Equal(t, tt.suggestion, errorResp.Suggestion) 159 | } 160 | 161 | // Verify error details 162 | if tt.expectedDetails != nil { 163 | assert.Len(t, errorResp.Details, len(tt.expectedDetails)) 164 | 165 | // Create a map of field to error message from the response 166 | actualDetails := make(map[string]string) 167 | for _, detail := range errorResp.Details { 168 | actualDetails[detail.Field] = detail.Message 169 | } 170 | 171 | // Compare expected and actual details 172 | for field, expectedMsg := range tt.expectedDetails { 173 | actualMsg, exists := actualDetails[field] 174 | assert.True(t, exists, "Expected error for field %s not found", field) 175 | assert.Equal(t, expectedMsg, actualMsg, 176 | "Error message mismatch for field %s", field) 177 | } 178 | } 179 | 180 | // Verify error code if specified 181 | if tt.expectedCode != "" { 182 | hasExpectedCode := false 183 | for _, detail := range errorResp.Details { 184 | if detail.Code == tt.expectedCode { 185 | hasExpectedCode = true 186 | break 187 | } 188 | } 189 | assert.True(t, hasExpectedCode, 190 | "Expected error code %s not found", tt.expectedCode) 191 | } 192 | } 193 | }) 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /server/validation/schema.go: -------------------------------------------------------------------------------- 1 | package validation 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/pkoukk/tiktoken-go" 7 | ) 8 | 9 | // Tokenizer defines the interface for token counting 10 | type Tokenizer interface { 11 | Encode(text string, allowedSpecial, disallowedSpecial []string) []int 12 | Decode(tokens []int) string 13 | CountTokens(text string) int 14 | } 15 | 16 | // tiktokenWrapper wraps tiktoken to implement our Tokenizer interface 17 | type tiktokenWrapper struct { 18 | *tiktoken.Tiktoken 19 | } 20 | 21 | func (t *tiktokenWrapper) CountTokens(text string) int { 22 | tokens := t.Encode(text, nil, nil) 23 | return len(tokens) 24 | } 25 | 26 | // TokenCounter handles token counting for messages using tiktoken 27 | type TokenCounter struct { 28 | encoding Tokenizer 29 | } 30 | 31 | // NewTokenCounter creates a new token counter for the specified model 32 | func NewTokenCounter(model string) (*TokenCounter, error) { 33 | encoding, err := tiktoken.EncodingForModel(model) 34 | if err != nil { 35 | return nil, fmt.Errorf("failed to get encoding for model %s: %v", model, err) 36 | } 37 | return &TokenCounter{encoding: &tiktokenWrapper{encoding}}, nil 38 | } 39 | 40 | // CountTokens counts the total number of tokens in a message 41 | func (tc *TokenCounter) CountTokens(msg Message) int { 42 | return tc.encoding.CountTokens(msg.Content) 43 | } 44 | 45 | // CountRequestTokens counts the total number of tokens in a completion request 46 | func (tc *TokenCounter) CountRequestTokens(req CompletionRequest) int { 47 | total := 0 48 | for _, msg := range req.Messages { 49 | total += tc.CountTokens(msg) 50 | } 51 | return total 52 | } 53 | 54 | // ValidateTokens checks if the request's token count is within limits 55 | func (tc *TokenCounter) ValidateTokens(req CompletionRequest, maxContextTokens int) error { 56 | totalTokens := tc.CountRequestTokens(req) 57 | if req.Options != nil && req.Options.MaxTokens > 0 { 58 | totalTokens += req.Options.MaxTokens 59 | } 60 | 61 | if totalTokens > maxContextTokens { 62 | return fmt.Errorf("total tokens (%d) exceeds max context length (%d)", totalTokens, maxContextTokens) 63 | } 64 | 65 | return nil 66 | } 67 | 68 | // ValidateOptions performs comprehensive validation of request options 69 | func ValidateOptions(opts *Options) error { 70 | if opts == nil { 71 | return nil 72 | } 73 | 74 | var errs []error 75 | 76 | // Validate generation parameters 77 | if opts.Temperature < 0 || opts.Temperature > 1 { 78 | errs = append(errs, fmt.Errorf("temperature must be between 0 and 1")) 79 | } 80 | if opts.TopP <= 0 || opts.TopP > 1 { 81 | errs = append(errs, fmt.Errorf("top_p must be between 0 and 1")) 82 | } 83 | if opts.FrequencyPenalty < -2 || opts.FrequencyPenalty > 2 { 84 | errs = append(errs, fmt.Errorf("frequency_penalty must be between -2 and 2")) 85 | } 86 | if opts.PresencePenalty < -2 || opts.PresencePenalty > 2 { 87 | errs = append(errs, fmt.Errorf("presence_penalty must be between -2 and 2")) 88 | } 89 | 90 | // Validate cache options 91 | if opts.Cache != nil { 92 | if err := validateCacheOptions(opts.Cache); err != nil { 93 | errs = append(errs, err) 94 | } 95 | } 96 | 97 | // Validate retry options 98 | if opts.Retry != nil { 99 | if err := validateRetryOptions(opts.Retry); err != nil { 100 | errs = append(errs, err) 101 | } 102 | } 103 | 104 | if len(errs) > 0 { 105 | return fmt.Errorf("validation errors: %v", errs) 106 | } 107 | 108 | return nil 109 | } 110 | 111 | // validateCacheOptions validates cache-specific configuration 112 | func validateCacheOptions(cache *CacheOptions) error { 113 | if !cache.Enable { 114 | return nil 115 | } 116 | 117 | var errs []error 118 | 119 | switch cache.Type { 120 | case "memory": 121 | if cache.MaxSize <= 0 { 122 | errs = append(errs, fmt.Errorf("max_size must be greater than 0 for memory cache")) 123 | } 124 | case "redis": 125 | if cache.Redis == nil { 126 | errs = append(errs, fmt.Errorf("redis configuration required when cache type is 'redis'")) 127 | } 128 | case "file": 129 | if cache.Dir == "" { 130 | errs = append(errs, fmt.Errorf("directory path required when cache type is 'file'")) 131 | } 132 | default: 133 | errs = append(errs, fmt.Errorf("invalid cache type: must be one of [memory, redis, file]")) 134 | } 135 | 136 | if cache.TTL <= 0 { 137 | errs = append(errs, fmt.Errorf("cache TTL must be greater than 0")) 138 | } 139 | 140 | if len(errs) > 0 { 141 | return fmt.Errorf("cache validation errors: %v", errs) 142 | } 143 | 144 | return nil 145 | } 146 | 147 | // validateRetryOptions validates retry-specific configuration 148 | func validateRetryOptions(retry *RetryOptions) error { 149 | var errs []error 150 | 151 | if retry.MaxRetries <= 0 { 152 | errs = append(errs, fmt.Errorf("max_retries must be greater than 0")) 153 | } 154 | if retry.InitialDelay <= 0 { 155 | errs = append(errs, fmt.Errorf("initial_delay must be greater than 0")) 156 | } 157 | if retry.MaxDelay <= retry.InitialDelay { 158 | errs = append(errs, fmt.Errorf("max_delay must be greater than initial_delay")) 159 | } 160 | if retry.Multiplier <= 1 { 161 | errs = append(errs, fmt.Errorf("multiplier must be greater than 1")) 162 | } 163 | 164 | validErrors := map[string]bool{ 165 | "rate_limit": true, 166 | "timeout": true, 167 | "server_error": true, 168 | } 169 | 170 | for _, errType := range retry.RetryableErrors { 171 | if !validErrors[errType] { 172 | errs = append(errs, fmt.Errorf("invalid retry error type: %s", errType)) 173 | } 174 | } 175 | 176 | if len(errs) > 0 { 177 | return fmt.Errorf("retry validation errors: %v", errs) 178 | } 179 | 180 | return nil 181 | } 182 | --------------------------------------------------------------------------------