├── .gitmodules ├── LICENSE ├── public-age-keys.txt ├── {{cookiecutter.project_slug}} ├── public-age-keys.txt ├── scripts │ ├── simplify-public-keys.sh │ ├── secops_config.sh │ ├── git_clean_local_branches.sh │ ├── decrypt_secrets.sh │ └── encrypt_secrets.sh ├── services │ ├── proxy │ │ └── rules │ │ │ ├── middlewares-chains.yml │ │ │ ├── tls-opts.yml │ │ │ └── middlewares.yml │ ├── model-server │ │ └── README.md │ ├── keykloak │ │ └── Dockerfile │ └── llm-router │ │ ├── litellm_config.yml │ │ └── google_vertexai.json.enc ├── .github │ └── ISSUE_TEMPLATE │ │ └── user-story.md ├── yamllintconfig.yaml ├── documentation │ └── secops │ │ └── add-key.md ├── Makefile ├── pyproject.toml ├── .env.enc ├── generate-env.sh ├── .gitignore └── docker-compose.yml ├── img └── llm-in-a-box-icon.png ├── scripts ├── simplify-public-keys.sh ├── handle-dev-secrets.sh ├── secops_config.sh ├── decrypt_secrets.sh └── encrypt_secrets.sh ├── docs ├── setup-advanced-k3s-fluxcd.md ├── paper.bib ├── LOCALDNS.md ├── cilogon-integration.md ├── paper.md ├── DIAGRAM.md ├── THOUGHTS.md └── QUICKSTART.md ├── services ├── proxy │ └── rules │ │ ├── middlewares-chains.yml │ │ ├── tls-opts.yml │ │ └── middlewares.yml ├── keykloak │ └── Dockerfile ├── model-server │ └── README.md └── llm-router │ ├── litellm_config.yml │ └── google_vertexai.json.enc ├── .github ├── workflows │ └── render-pdf.yml └── ISSUE_TEMPLATE │ └── user-story.md ├── cookiecutter.json ├── yamllintconfig.yaml ├── .env_template ├── pyproject.toml ├── Makefile ├── .gitignore └── README.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache 2.0 -------------------------------------------------------------------------------- /public-age-keys.txt: -------------------------------------------------------------------------------- 1 | # Georg 2 | age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0 -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/public-age-keys.txt: -------------------------------------------------------------------------------- 1 | # Georg 2 | age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0 -------------------------------------------------------------------------------- /img/llm-in-a-box-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/complexity-science-hub/llm-in-a-box-template/HEAD/img/llm-in-a-box-icon.png -------------------------------------------------------------------------------- /scripts/simplify-public-keys.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | grep -v '^\#' public-age-keys.txt | tr -d '[:space:]' | tr '\n' ',' | sed 's/,$//' -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/scripts/simplify-public-keys.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | grep -v '^\#' public-age-keys.txt | tr -d '[:space:]' | tr '\n' ',' | sed 's/,$//' -------------------------------------------------------------------------------- /scripts/handle-dev-secrets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cp key.txt rendered-template/llm_in_a_box/ 4 | cd rendered-template/llm_in_a_box 5 | make secrets-decrypt 6 | cd ../../ -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/scripts/secops_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define files to encrypt/decrypt 4 | FILES_TO_ENCRYPT=".env services/llm-router/google_vertexai.json" 5 | -------------------------------------------------------------------------------- /scripts/secops_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define files to encrypt/decrypt 4 | FILES_TO_ENCRYPT="{{cookiecutter.project_slug}}/.env {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json" 5 | -------------------------------------------------------------------------------- /docs/setup-advanced-k3s-fluxcd.md: -------------------------------------------------------------------------------- 1 | # Advanced setup with k3s and fluxcd 2 | 3 | 4 | This is a more advanced setup. 5 | We use [fluxcd](https://fluxcd.io/) and [k3s](https://k3s.io/). 6 | 7 | > TODO eventuall add the instructions here 8 | 9 | ## Setup of fluxcd 10 | 11 | ## setup of k3s -------------------------------------------------------------------------------- /scripts/decrypt_secrets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./scripts/secops_config.sh 4 | 5 | export SOPS_AGE_KEY_FILE=key.txt 6 | 7 | for file in ${FILES_TO_ENCRYPT}; do 8 | echo "Decrypting: $file" 9 | sops --decrypt --input-type binary --output-type binary "$file.enc" > "$file" 10 | done 11 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/scripts/git_clean_local_branches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | git branch | grep -v "^\*" | grep -v master | grep -v main | xargs git branch -d 4 | git fetch -p ; git branch -r | awk '{print $1}' | egrep -v -f /dev/fd/0 <(git branch -vv | grep origin) | awk '{print $1}' | xargs git branch -d -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/scripts/decrypt_secrets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./scripts/secops_config.sh 4 | 5 | export SOPS_AGE_KEY_FILE=key.txt 6 | 7 | for file in ${FILES_TO_ENCRYPT}; do 8 | echo "Decrypting: $file" 9 | sops --decrypt --input-type binary --output-type binary "$file.enc" > "$file" 10 | done 11 | -------------------------------------------------------------------------------- /scripts/encrypt_secrets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./scripts/secops_config.sh 4 | 5 | SOPS_AGE_RECIPIENTS=$(./scripts/simplify-public-keys.sh) 6 | echo "recipient: ${SOPS_AGE_RECIPIENTS}" 7 | 8 | for file in ${FILES_TO_ENCRYPT}; do 9 | echo "Encrypting: $file" 10 | sops --encrypt --age ${SOPS_AGE_RECIPIENTS} --input-type binary --output-type binary "$file" > "$file.enc" 11 | done 12 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/scripts/encrypt_secrets.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source ./scripts/secops_config.sh 4 | 5 | SOPS_AGE_RECIPIENTS=$(./scripts/simplify-public-keys.sh) 6 | echo "recipient: ${SOPS_AGE_RECIPIENTS}" 7 | 8 | for file in ${FILES_TO_ENCRYPT}; do 9 | echo "Encrypting: $file" 10 | sops --encrypt --age ${SOPS_AGE_RECIPIENTS} --input-type binary --output-type binary "$file" > "$file.enc" 11 | done 12 | -------------------------------------------------------------------------------- /services/proxy/rules/middlewares-chains.yml: -------------------------------------------------------------------------------- 1 | http: 2 | middlewares: 3 | chain-no-auth: 4 | chain: 5 | middlewares: 6 | - middlewares-rate-limit 7 | - middlewares-secure-headers 8 | - middlewares-compress 9 | 10 | chain-basic-auth: 11 | chain: 12 | middlewares: 13 | - middlewares-rate-limit 14 | #- middlewares-https-redirectscheme 15 | - middlewares-secure-headers 16 | - middlewares-basic-auth 17 | - middlewares-compress 18 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/proxy/rules/middlewares-chains.yml: -------------------------------------------------------------------------------- 1 | http: 2 | middlewares: 3 | chain-no-auth: 4 | chain: 5 | middlewares: 6 | - middlewares-rate-limit 7 | - middlewares-secure-headers 8 | - middlewares-compress 9 | 10 | chain-basic-auth: 11 | chain: 12 | middlewares: 13 | - middlewares-rate-limit 14 | #- middlewares-https-redirectscheme 15 | - middlewares-secure-headers 16 | - middlewares-basic-auth 17 | - middlewares-compress 18 | -------------------------------------------------------------------------------- /.github/workflows/render-pdf.yml: -------------------------------------------------------------------------------- 1 | name: Generate PDF from Markdown 2 | on: 3 | push: 4 | branches: 5 | - main 6 | jobs: 7 | paper: 8 | runs-on: ubuntu-latest 9 | name: Paper Draft 10 | steps: 11 | - name: Checkout repository 12 | uses: actions/checkout@v4 13 | - name: TeX and PDF 14 | uses: docker://openjournals/paperdraft:latest 15 | with: 16 | journal: joss 17 | args: '-k ./docs/paper.md' 18 | - name: Upload PDF to Artifacts 19 | uses: actions/upload-artifact@v4 20 | with: 21 | name: generated-paper 22 | path: ./docs/paper.pdf -------------------------------------------------------------------------------- /services/proxy/rules/tls-opts.yml: -------------------------------------------------------------------------------- 1 | tls: 2 | options: 3 | tls-opts: 4 | minVersion: VersionTLS12 5 | cipherSuites: 6 | - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 7 | - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 8 | - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 9 | - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 10 | - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 11 | - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 12 | - TLS_AES_128_GCM_SHA256 13 | - TLS_AES_256_GCM_SHA384 14 | - TLS_CHACHA20_POLY1305_SHA256 15 | - TLS_FALLBACK_SCSV # Client is doing version fallback. See RFC 7507 16 | curvePreferences: 17 | - CurveP521 18 | - CurveP384 19 | sniStrict: true 20 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/proxy/rules/tls-opts.yml: -------------------------------------------------------------------------------- 1 | tls: 2 | options: 3 | tls-opts: 4 | minVersion: VersionTLS12 5 | cipherSuites: 6 | - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 7 | - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 8 | - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 9 | - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 10 | - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 11 | - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305 12 | - TLS_AES_128_GCM_SHA256 13 | - TLS_AES_256_GCM_SHA384 14 | - TLS_CHACHA20_POLY1305_SHA256 15 | - TLS_FALLBACK_SCSV # Client is doing version fallback. See RFC 7507 16 | curvePreferences: 17 | - CurveP521 18 | - CurveP384 19 | sniStrict: true 20 | -------------------------------------------------------------------------------- /cookiecutter.json: -------------------------------------------------------------------------------- 1 | { 2 | "project_name": "LLM in a box", 3 | "organization": "myorg", 4 | "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}", 5 | "project_slug_pixi": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}", 6 | "author": "Your name", 7 | "author_email": "Your@email.com", 8 | "cloudflare_api_key": "<>", 9 | "cloudflare_email": "<>", 10 | "root_domain": "<>", 11 | "time_zone": "Europe/Vienna", 12 | "state_path": "{{ '.' | abspath }}/{{ cookiecutter.project_slug }}-z_state", 13 | 14 | "_extensions": [ 15 | "jinja2_ospath.extensions.OSPathExtension" 16 | ], 17 | "_copy_without_render": [ 18 | "services/model-server/vllm" 19 | ] 20 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/user-story.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: user story 3 | about: Describe this issue template's purpose here. 4 | title: "[user story] ..." 5 | labels: user story 6 | assignees: '' 7 | 8 | --- 9 | 10 | --- 11 | 12 | As a ..., I want to ..., so I can ... 13 | 14 | # Acceptance criteria (given when then) 15 | 16 | - [ ] This is something that can be verified to show that this user story is satisfied. 17 | 18 | # details 19 | 20 | ## assumptions 21 | ## dependencies 22 | ## related documents 23 | ## notes 24 | 25 | # Sprint Ready Checklist 26 | 1. - [ ] Acceptance criteria defined 27 | 2. - [ ] Team understands acceptance criteria 28 | 3. - [ ] Team has defined solution / steps to satisfy acceptance criteria 29 | 4. - [ ] Acceptance criteria is verifiable / testable 30 | 5. - [ ] External / 3rd Party dependencies identified 31 | -------------------------------------------------------------------------------- /yamllintconfig.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | extends: default 4 | 5 | rules: 6 | braces: 7 | level: warning 8 | max-spaces-inside: 1 9 | brackets: 10 | level: warning 11 | max-spaces-inside: 1 12 | colons: 13 | level: warning 14 | commas: 15 | level: warning 16 | comments: disable 17 | comments-indentation: disable 18 | document-start: disable 19 | empty-lines: 20 | level: warning 21 | hyphens: 22 | level: warning 23 | indentation: 24 | level: warning 25 | indent-sequences: consistent 26 | line-length: 27 | max: 196 28 | level: warning 29 | allow-non-breakable-inline-mappings: true 30 | truthy: disable 31 | 32 | ignore: | 33 | **/node_modules/** 34 | **/vendor/** 35 | **/dist/** 36 | **/build/** 37 | .pixi/envs/ 38 | **/.pixi/envs/** 39 | **/dbt_packages/** 40 | **.pixi/solve-group-envs/** -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/.github/ISSUE_TEMPLATE/user-story.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: user story 3 | about: Describe this issue template's purpose here. 4 | title: "[user story] ..." 5 | labels: user story 6 | assignees: '' 7 | 8 | --- 9 | 10 | --- 11 | 12 | As a ..., I want to ..., so I can ... 13 | 14 | # Acceptance criteria (given when then) 15 | 16 | - [ ] This is something that can be verified to show that this user story is satisfied. 17 | 18 | # details 19 | 20 | ## assumptions 21 | ## dependencies 22 | ## related documents 23 | ## notes 24 | 25 | # Sprint Ready Checklist 26 | 1. - [ ] Acceptance criteria defined 27 | 2. - [ ] Team understands acceptance criteria 28 | 3. - [ ] Team has defined solution / steps to satisfy acceptance criteria 29 | 4. - [ ] Acceptance criteria is verifiable / testable 30 | 5. - [ ] External / 3rd Party dependencies identified 31 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/yamllintconfig.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | extends: default 4 | 5 | rules: 6 | braces: 7 | level: warning 8 | max-spaces-inside: 1 9 | brackets: 10 | level: warning 11 | max-spaces-inside: 1 12 | colons: 13 | level: warning 14 | commas: 15 | level: warning 16 | comments: disable 17 | comments-indentation: disable 18 | document-start: disable 19 | empty-lines: 20 | level: warning 21 | hyphens: 22 | level: warning 23 | indentation: 24 | level: warning 25 | indent-sequences: consistent 26 | line-length: 27 | max: 196 28 | level: warning 29 | allow-non-breakable-inline-mappings: true 30 | truthy: disable 31 | 32 | ignore: | 33 | **/node_modules/** 34 | **/vendor/** 35 | **/dist/** 36 | **/build/** 37 | .pixi/envs/ 38 | **/.pixi/envs/** 39 | **/dbt_packages/** 40 | **.pixi/solve-group-envs/** -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/documentation/secops/add-key.md: -------------------------------------------------------------------------------- 1 | # Add key for age encryption 2 | 3 | example how to create a public/private pair for age based encryption 4 | 5 | ## creating the keys for the new user 6 | 7 | The new user executes: 8 | 9 | ```bash 10 | age-keygen -o key.txt 11 | ``` 12 | 13 | The new user creates a pull request to add his/her public key to the file. 14 | 15 | > Ensure that the key is added to the list of [public age keys](public-keys.txt). 16 | > Keys are comma separated - just add a new line and describe the owner/person for that key. 17 | 18 | > Ensure all files to be encrypted are listed in [secops_config.sh][scripts/secops_config.sh] 19 | 20 | Do not forget to re-encrypt all the secrets for this new user. 21 | 22 | 23 | The `key.txt` file should be placed at the root of the repository. 24 | 25 | 26 | ## encrypting/decrypting 27 | 28 | To en-/decrypt: 29 | 30 | ```bash 31 | pixi run secrets-encrypt 32 | pixi run secrets-decrypt 33 | ``` -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/model-server/README.md: -------------------------------------------------------------------------------- 1 | # VLLM 2 | 3 | Ensure the desired version of VLMM is selected here. 4 | 5 | ## initial setup 6 | 7 | ``` 8 | git clone --branch v0.9.2 --depth 1 https://github.com/vllm-project/vllm.git services/model-server/vllm 9 | ``` 10 | 11 | 12 | ## gpu vs cpu 13 | 14 | ``` 15 | docker run --runtime nvidia --gpus all \ 16 | -v ~/.cache/huggingface:/root/.cache/huggingface \ 17 | --env "HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}" \ 18 | -p 8000:8000 \ 19 | --ipc=host \ 20 | vllm/vllm-openai:latest \ 21 | --model mistralai/Mistral-7B-v0.1 22 | 23 | docker run --privileged \ 24 | -v ~/.cache/huggingface:/root/.cache/huggingface \ 25 | --env "HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}" \ 26 | -p 8001:8000 \ 27 | --ipc=host \ 28 | public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.9.2 \ 29 | --model mistralai/Mistral-7B-v0.1 30 | 31 | curl http://localhost:8000/v1/models 32 | curl http://localhost:8000/v1/completions \ 33 | -H "Content-Type: application/json" \ 34 | -d '{ 35 | "model": "mistralai/Mistral-7B-v0.1", 36 | "prompt": "San Francisco is a", 37 | "max_tokens": 7, 38 | "temperature": 0 39 | }' 40 | 41 | curl http://localhost:8000/v1/completions \ 42 | -H "Content-Type: application/json" \ 43 | -d '{ 44 | "model": "microsoft/Phi-4-mini-instruct", 45 | "prompt": "San Francisco is a", 46 | "max_tokens": 7, 47 | "temperature": 0 48 | }' 49 | 50 | curl http://localhost:8000/v1/completions \ 51 | -H "Content-Type: application/json" \ 52 | -d '{ 53 | "model": "meta-llama/Llama-3.2-1B-Instruct", 54 | "prompt": "San Francisco is a", 55 | "max_tokens": 7, 56 | "temperature": 0 57 | }' 58 | ``` -------------------------------------------------------------------------------- /.env_template: -------------------------------------------------------------------------------- 1 | # Domain configuration 2 | ROOT_DOMAIN=project.docker # see LOCALDNS.md for more information 3 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22 4 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12 5 | TZ=UTC # default to UTC (Universal Time Coordinated) and let auto-detect happen with ./generate-env.sh 6 | 7 | # LLM Router Database (LiteLLM) 8 | LLM_ROUTER_DB=litellm 9 | LLM_ROUTER_DB_USER=litellm 10 | # Generate with: openssl rand -hex 32 11 | LLM_ROUTER_DB_PASSWORD=CHANGEME_GENERATE_HEX32 12 | 13 | # LiteLLM Configuration 14 | # Generate with: openssl rand -hex 32 15 | LITELLM_MASTER_KEY=CHANGEME_GENERATE_HEX32 16 | # Generate with: openssl rand -hex 32 17 | LITELLM_SALT_KEY=CHANGEME_GENERATE_HEX32 18 | LITELLM_UI_USERNAME=admin 19 | LITELLM_UI_PASSWORD=CHANGEME_SECURE_PASSWORD 20 | 21 | # API Keys for Model Providers 22 | # Get from: https://platform.openai.com/api-keys 23 | ROUTER_OPENAI_API_KEY=sk-CHANGEME_YOUR_OPENAI_KEY 24 | 25 | # Get from: https://console.anthropic.com/settings/keys 26 | ROUTER_ANTHROPIC_API_KEY=sk-CHANGEME_YOUR_ANTHROPIC_KEY 27 | 28 | # Chat UI Database (OpenWebUI) 29 | CHAT_UI_DB=openwebui 30 | CHAT_UI_DB_USER=openwebui 31 | # Generate with: openssl rand -hex 32 32 | CHAT_UI_DB_PASSWORD=CHANGEME_GENERATE_HEX32 33 | # Generate with: openssl rand -hex 32 34 | CHAT_UI_SECRET_KEY=CHANGEME_GENERATE_HEX32 35 | 36 | # Vector Database (Qdrant) 37 | # Generate with: openssl rand -hex 32 38 | QDRANT__SERVICE__API_KEY=CHANGEME_GENERATE_HEX32 39 | 40 | # Hugging Face API Key 41 | # Get from: https://huggingface.co/settings/tokens 42 | HUGGING_FACE_HUB_TOKEN=hf_CHANGEME_YOUR_HUGGING_FACE_API_KEY 43 | -------------------------------------------------------------------------------- /services/proxy/rules/middlewares.yml: -------------------------------------------------------------------------------- 1 | http: 2 | middlewares: 3 | 4 | middlewares-rate-limit: 5 | rateLimit: 6 | average: 100 7 | burst: 50 8 | 9 | middlewares-https-redirectscheme: 10 | redirectScheme: 11 | scheme: https 12 | permanent: true 13 | 14 | middlewares-secure-headers: 15 | headers: 16 | accessControlAllowMethods: 17 | - GET 18 | - OPTIONS 19 | - PUT 20 | accessControlMaxAge: 100 21 | hostsProxyHeaders: 22 | - "X-Forwarded-Host" 23 | stsSeconds: 63072000 24 | stsIncludeSubdomains: true 25 | stsPreload: true 26 | forceSTSHeader: true 27 | 28 | # TODO: Enable for templated repo 29 | customFrameOptionsValue: "allow-from https:{{env "ROOT_DOMAIN"}}" 30 | contentTypeNosniff: true 31 | browserXssFilter: true 32 | # sslForceHost: true # add sslHost to all of the services 33 | # sslHost: sslHost: "{{env "ROOT_DOMAIN"}}" 34 | referrerPolicy: "same-origin" 35 | permissionsPolicy: "camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=()" 36 | customResponseHeaders: 37 | X-Robots-Tag: "none,noarchive,nosnippet,notranslate,noimageindex," 38 | server: "" 39 | # https://community.traefik.io/t/how-to-make-websockets-work-with-traefik-2-0-setting-up-rancher/1732 40 | # X-Forwarded-Proto: "https" 41 | 42 | middlewares-compress: 43 | compress: {} 44 | 45 | # https://stackoverflow.com/questions/49717670/how-to-config-upload-body-size-restriction-in-traefik 46 | middlewares-buffering: 47 | buffering: 48 | maxResponseBodyBytes: 2000000 49 | maxRequestBodyBytes: 10485760 50 | memRequestBodyBytes: 2097152 51 | memResponseBodyBytes: 2097152 52 | retryExpression: "IsNetworkError() && Attempts() <= 2" 53 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/proxy/rules/middlewares.yml: -------------------------------------------------------------------------------- 1 | http: 2 | middlewares: 3 | 4 | middlewares-rate-limit: 5 | rateLimit: 6 | average: 100 7 | burst: 500 8 | 9 | middlewares-https-redirectscheme: 10 | redirectScheme: 11 | scheme: https 12 | permanent: true 13 | 14 | middlewares-secure-headers: 15 | headers: 16 | accessControlAllowMethods: 17 | - GET 18 | - OPTIONS 19 | - PUT 20 | accessControlMaxAge: 100 21 | hostsProxyHeaders: 22 | - "X-Forwarded-Host" 23 | stsSeconds: 63072000 24 | stsIncludeSubdomains: true 25 | stsPreload: true 26 | forceSTSHeader: true 27 | 28 | # TODO: Enable for templated repo 29 | customFrameOptionsValue: {% raw %}"allow-from https:{{env "ROOT_DOMAIN"}}"{% endraw %} 30 | contentTypeNosniff: true 31 | browserXssFilter: true 32 | # sslForceHost: true # add sslHost to all of the services 33 | # sslHost: {% raw %}sslHost: "{{env "ROOT_DOMAIN"}}"{% endraw %} 34 | referrerPolicy: "same-origin" 35 | permissionsPolicy: "camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=()" 36 | customResponseHeaders: 37 | X-Robots-Tag: "none,noarchive,nosnippet,notranslate,noimageindex," 38 | server: "" 39 | # https://community.traefik.io/t/how-to-make-websockets-work-with-traefik-2-0-setting-up-rancher/1732 40 | # X-Forwarded-Proto: "https" 41 | 42 | middlewares-compress: 43 | compress: {} 44 | 45 | # https://stackoverflow.com/questions/49717670/how-to-config-upload-body-size-restriction-in-traefik 46 | middlewares-buffering: 47 | buffering: 48 | maxResponseBodyBytes: 2000000 49 | maxRequestBodyBytes: 10485760 50 | memRequestBodyBytes: 2097152 51 | memResponseBodyBytes: 2097152 52 | retryExpression: "IsNetworkError() && Attempts() <= 2" 53 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm-in-a-box-template" 3 | version = "1.0.0" 4 | description = "LLM in a box template" 5 | authors = [{ name = "Georg Heiler", email = "heiler@csh.ac.at" }] 6 | requires-python = ">= 3.13,< 3.14" 7 | 8 | [tool.pixi.project] 9 | platforms = ["linux-64", "osx-arm64", "win-64", "linux-aarch64"] 10 | channels = ["conda-forge"] 11 | 12 | [build-system] 13 | build-backend = "hatchling.build" 14 | requires = ["hatchling"] 15 | 16 | [tool.pixi.environments] 17 | secops = { features = ["secops"], solve-group = "secops" } 18 | ci = { features = ["ci-basics"], solve-group = "ci" } 19 | template = { features = ["template"], solve-group = "template" } 20 | 21 | [tool.pixi.feature.ci-basics.dependencies] 22 | yamllint = ">=1.35.1,<2" 23 | taplo = ">=0.9.3,<0.11" 24 | 25 | [tool.pixi.feature.template.dependencies] 26 | cruft = "~=2.16.0" 27 | cookiecutter = "~=2.6.0" 28 | 29 | [tool.pixi.feature.template.pypi-dependencies] 30 | jinja2-ospath = ">=0.3.0,<0.4.0" 31 | 32 | [tool.pixi.feature.secops.dependencies] 33 | go-sops = "~=3.9.4" 34 | age = "~=1.2.1" 35 | 36 | 37 | [tool.pixi.tasks.fmt] 38 | cmd = "pixi run -e ci yamllint -c yamllintconfig.yaml . && taplo fmt" 39 | description = "Format yaml & toml files" 40 | env = { RUST_LOG = "warn" } 41 | 42 | [tool.pixi.tasks.tpl-init-cruft] 43 | cmd = "pixi run -e template cruft create https://github.com/complexity-science-hub/llm-in-a-box-template.git" 44 | description = "Initialize template" 45 | 46 | 47 | [tool.pixi.tasks.render-dev] 48 | cmd = "pixi run --frozen -e template cruft create . --no-input --overwrite-if-exists --output-dir rendered-template && ./scripts/handle-dev-secrets.sh" 49 | description = "locally render a development instance of the template" 50 | 51 | [tool.pixi.tasks.start-template] 52 | cmd = "pixi run --frozen -e template docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up" 53 | cwd = "rendered-template/llm_in_a_box" 54 | description = "spin up the template" 55 | 56 | [tool.pixi.tasks.secrets-encrypt] 57 | cmd = "pixi run -e secops ./scripts/encrypt_secrets.sh" 58 | description = "encrypt secrets with SOPS and AGE" 59 | 60 | [tool.pixi.tasks.secrets-decrypt] 61 | cmd = "pixi run -e secops ./scripts/decrypt_secrets.sh" 62 | description = "decrypt secrets with SOPS and AGE" 63 | -------------------------------------------------------------------------------- /docs/paper.bib: -------------------------------------------------------------------------------- 1 | @misc{ollama, 2 | author = {He, Jeffrey and Paull, Michael and others}, 3 | title = {Ollama}, 4 | year = {2023}, 5 | publisher = {GitHub}, 6 | journal = {GitHub repository}, 7 | howpublished = {\url{https://github.com/ollama/ollama}} 8 | } 9 | 10 | @misc{litellm, 11 | author = {K K, Ishaan and others}, 12 | title = {LiteLLM}, 13 | year = {2023}, 14 | publisher = {GitHub}, 15 | journal = {GitHub repository}, 16 | howpublished = {\url{https://github.com/BerriAI/litellm}} 17 | } 18 | 19 | @misc{openwebui, 20 | author = {Tym, Timothy and others}, 21 | title = {Open-WebUI}, 22 | year = {2023}, 23 | publisher = {GitHub}, 24 | journal = {GitHub repository}, 25 | howpublished = {\url{https://github.com/open-webui/open-webui}} 26 | } 27 | 28 | @article{hettrick2013uk, 29 | title={UK Research Software Engineer Survey 2013}, 30 | author={Hettrick, Simon and others}, 31 | year={2013}, 32 | publisher={Software Sustainability Institute}, 33 | doi={10.5281/zenodo.14809}, 34 | url={https://doi.org/10.5281/zenodo.14809} 35 | } 36 | @misc{graham_mcps_2025, 37 | type = {Substack newsletter}, 38 | title = {{MCPs}, {Gatekeepers}, and the {Future} of {AI}}, 39 | url = {https://iamcharliegraham.substack.com/p/mcps-gatekeepers-and-the-future-of}, 40 | abstract = {MCPs—Model Context Protocols—are set to transform AI from passive chatbots into powerful, action-taking agents. But the real story isn’t what MCPs enable—it’s who controls them.}, 41 | urldate = {2025-04-24}, 42 | journal = {In The AIrena}, 43 | author = {Graham, Charlie}, 44 | month = apr, 45 | year = {2025}, 46 | file = {Snapshot:/Users/geoheil/Zotero/storage/N4YFTQQU/mcps-gatekeepers-and-the-future-of.html:text/html}, 47 | } 48 | @misc{auer2024doclingtechnicalreport, 49 | title={Docling Technical Report}, 50 | author={Christoph Auer and Maksym Lysak and Ahmed Nassar and Michele Dolfi and Nikolaos Livathinos and Panos Vagenas and Cesar Berrospi Ramis and Matteo Omenetti and Fabian Lindlbauer and Kasper Dinkla and Lokesh Mishra and Yusik Kim and Shubham Gupta and Rafael Teixeira de Lima and Valery Weber and Lucas Morin and Ingmar Meijer and Viktor Kuropiatnyk and Peter W. J. Staar}, 51 | year={2024}, 52 | eprint={2408.09869}, 53 | archivePrefix={arXiv}, 54 | primaryClass={cs.CL}, 55 | url={https://arxiv.org/abs/2408.09869}, 56 | } -------------------------------------------------------------------------------- /services/keykloak/Dockerfile: -------------------------------------------------------------------------------- 1 | # ############################################################################### 2 | # # Dockerfile – Keycloak 26.2 with PostgreSQL build + SCIM plug-in (auto-fetch) 3 | # ############################################################################### 4 | 5 | # ######################## 0️⃣ Parameters you might tweak ###################### 6 | # # Pass SCIM_VERSION at build time if you want a different tag, e.g. 7 | # # docker build --build-arg SCIM_VERSION=2.2.2 -t my/keycloak:26.2-scim . 8 | # ARG SCIM_VERSION=2.2.1 9 | # # The download URL template (GitHub project: Captain-P-Goldfish/scim-for-keycloak) 10 | # ARG SCIM_URL_TEMPLATE="https://github.com/Captain-P-Goldfish/scim-for-keycloak/releases/download/v${SCIM_VERSION}/scim-for-keycloak-kc-26-${SCIM_VERSION}-free.jar" 11 | 12 | # ######################## 1️⃣ Fetch stage – download the JAR ################## 13 | # FROM alpine:3.19 AS fetcher 14 | # ARG SCIM_VERSION 15 | # ARG SCIM_URL_TEMPLATE 16 | # RUN apk add --no-cache curl ca-certificates && \ 17 | # curl -L --fail -o /scim-plugin.jar "${SCIM_URL_TEMPLATE}" 18 | 19 | # ######################## 2️⃣ Build/augmentation stage ####################### 20 | # FROM quay.io/keycloak/keycloak:26.2 AS builder 21 | 22 | # # ---- Build-time config (becomes immutable under --optimized) --------------- 23 | # ENV KC_DB=postgres \ 24 | # KC_HEALTH_ENABLED=true \ 25 | # KC_METRICS_ENABLED=true \ 26 | # KC_FEATURES=token-exchange,admin-fine-grained-authz 27 | 28 | # # Copy downloaded SCIM plug-in JAR into providers directory 29 | # COPY --from=fetcher /scim-plugin.jar /opt/keycloak/providers/ 30 | 31 | # # Run Quarkus build once and strip temp files 32 | # RUN /opt/keycloak/bin/kc.sh build --optimized && \ 33 | # rm -rf /opt/keycloak/standalone/tmp/* 34 | 35 | # ######################## 3️⃣ Runtime stage – slim, immutable ################ 36 | # FROM quay.io/keycloak/keycloak:26.2 AS runtime 37 | # LABEL org.opencontainers.image.title="Keycloak 26 optimized w/ SCIM" \ 38 | # org.opencontainers.image.description="Optimised Keycloak with SCIM-for-Keycloak plug-in auto-fetched at build time." \ 39 | # maintainer="you@example.com" 40 | 41 | # COPY --from=builder /opt/keycloak/ /opt/keycloak/ 42 | # USER 1000 43 | # EXPOSE 8080 9000 44 | # ENTRYPOINT ["/opt/keycloak/bin/kc.sh", "start", "--optimized"] 45 | # ############################################################################### 46 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/keykloak/Dockerfile: -------------------------------------------------------------------------------- 1 | # ############################################################################### 2 | # # Dockerfile – Keycloak 26.2 with PostgreSQL build + SCIM plug-in (auto-fetch) 3 | # ############################################################################### 4 | 5 | # ######################## 0️⃣ Parameters you might tweak ###################### 6 | # # Pass SCIM_VERSION at build time if you want a different tag, e.g. 7 | # # docker build --build-arg SCIM_VERSION=2.2.2 -t my/keycloak:26.2-scim . 8 | # ARG SCIM_VERSION=2.2.1 9 | # # The download URL template (GitHub project: Captain-P-Goldfish/scim-for-keycloak) 10 | # ARG SCIM_URL_TEMPLATE="https://github.com/Captain-P-Goldfish/scim-for-keycloak/releases/download/v${SCIM_VERSION}/scim-for-keycloak-kc-26-${SCIM_VERSION}-free.jar" 11 | 12 | # ######################## 1️⃣ Fetch stage – download the JAR ################## 13 | # FROM alpine:3.19 AS fetcher 14 | # ARG SCIM_VERSION 15 | # ARG SCIM_URL_TEMPLATE 16 | # RUN apk add --no-cache curl ca-certificates && \ 17 | # curl -L --fail -o /scim-plugin.jar "${SCIM_URL_TEMPLATE}" 18 | 19 | # ######################## 2️⃣ Build/augmentation stage ####################### 20 | # FROM quay.io/keycloak/keycloak:26.2 AS builder 21 | 22 | # # ---- Build-time config (becomes immutable under --optimized) --------------- 23 | # ENV KC_DB=postgres \ 24 | # KC_HEALTH_ENABLED=true \ 25 | # KC_METRICS_ENABLED=true \ 26 | # KC_FEATURES=token-exchange,admin-fine-grained-authz 27 | 28 | # # Copy downloaded SCIM plug-in JAR into providers directory 29 | # COPY --from=fetcher /scim-plugin.jar /opt/keycloak/providers/ 30 | 31 | # # Run Quarkus build once and strip temp files 32 | # RUN /opt/keycloak/bin/kc.sh build --optimized && \ 33 | # rm -rf /opt/keycloak/standalone/tmp/* 34 | 35 | # ######################## 3️⃣ Runtime stage – slim, immutable ################ 36 | # FROM quay.io/keycloak/keycloak:26.2 AS runtime 37 | # LABEL org.opencontainers.image.title="Keycloak 26 optimized w/ SCIM" \ 38 | # org.opencontainers.image.description="Optimised Keycloak with SCIM-for-Keycloak plug-in auto-fetched at build time." \ 39 | # maintainer="you@example.com" 40 | 41 | # COPY --from=builder /opt/keycloak/ /opt/keycloak/ 42 | # USER 1000 43 | # EXPOSE 8080 9000 44 | # ENTRYPOINT ["/opt/keycloak/bin/kc.sh", "start", "--optimized"] 45 | # ############################################################################### 46 | -------------------------------------------------------------------------------- /services/model-server/README.md: -------------------------------------------------------------------------------- 1 | # VLLM 2 | 3 | Ensure the desired version of VLMM is selected here. 4 | 5 | ## initial setup 6 | 7 | ``` 8 | git clone --branch v0.9.1 --depth 1 https://github.com/vllm-project/vllm.git 9 | 10 | # alternatively 11 | git submodule add https://github.com/vllm-project/vllm.git vllm 12 | cd vllm 13 | git checkout v0.9.1 14 | ``` 15 | 16 | ## updating a new version of vllm 17 | 18 | Ensure the right version is set in docker compose: `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM: "0.9.1"` 19 | 20 | ```bash 21 | # 1. Navigate into the submodule directory 22 | cd services/model-server/vllm 23 | 24 | # 2. Add the original vLLM repo as a new remote called "upstream" 25 | git remote add upstream https://github.com/vllm-project/vllm.git 26 | 27 | # 3. Verify that it's set up correctly 28 | git remote -v 29 | 30 | git fetch upstream 31 | git fetch upstream --tags 32 | 33 | # ensure we are on our hotfixed branch 34 | # ba28a8452b4e278b7da4e7a1eb1bc5a334a755ca 35 | git checkout template-git-fix 36 | 37 | # git rebase upstream/main 38 | # This is the key command. Read it as: 39 | # "Rebase ONTO v0.9.2, all commits that are on my current branch (template-git-fix) 40 | # since it diverged from v0.9.1." 41 | git rebase --onto v0.9.2 v0.9.1 template-git-fix 42 | 43 | git push --force-with-lease origin template-git-fix 44 | 45 | git add services/model-server/vllm 46 | git commit -m "chore: Update vllm submodule to v from upstream" 47 | git push 48 | ``` 49 | 50 | ## gpu vs cpu 51 | 52 | ``` 53 | docker run --runtime nvidia --gpus all \ 54 | -v ~/.cache/huggingface:/root/.cache/huggingface \ 55 | --env "HUGGING_FACE_HUB_TOKEN=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" \ 56 | -p 8000:8000 \ 57 | --ipc=host \ 58 | vllm/vllm-openai:latest \ 59 | --model mistralai/Mistral-7B-v0.1 60 | 61 | docker run --privileged \ 62 | -v ~/.cache/huggingface:/root/.cache/huggingface \ 63 | --env "HUGGING_FACE_HUB_TOKEN=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" \ 64 | -p 8000:8000 \ 65 | --ipc=host \ 66 | public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.9.1 \ 67 | --model mistralai/Mistral-7B-v0.1 68 | 69 | 70 | 71 | 72 | 73 | 74 | curl http://localhost:8000/v1/models 75 | curl http://localhost:8000/v1/completions \ 76 | -H "Content-Type: application/json" \ 77 | -d '{ 78 | "model": "mistralai/Mistral-7B-v0.1", 79 | "prompt": "San Francisco is a", 80 | "max_tokens": 7, 81 | "temperature": 0 82 | }' 83 | 84 | ``` -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: secrets-encrypt 2 | ## encrypt secrets with SOPS and AGE 3 | secrets-encrypt: 4 | pixi run secrets-encrypt 5 | 6 | 7 | .PHONY: secrets-decrypt 8 | ## encrypt secrets with SOPS and AGE 9 | secrets-decrypt: 10 | pixi run secrets-decrypt 11 | 12 | 13 | ################################################################################# 14 | # PROJECT RULES # 15 | ################################################################################# 16 | ################################################################################# 17 | # Self Documenting Commands # 18 | ################################################################################# 19 | .DEFAULT_GOAL := help 20 | # Inspired by 21 | # sed script explained: 22 | # /^##/: 23 | # * save line in hold space 24 | # * purge line 25 | # * Loop: 26 | # * append newline + line to hold space 27 | # * go to next line 28 | # * if line starts with doc comment, strip comment character off and loop 29 | # * remove target prerequisites 30 | # * append hold space (+ newline) to line 31 | # * replace newline plus comments by `---` 32 | # * print line 33 | # Separate expressions are necessary because labels cannot be delimited by 34 | # semicolon; see 35 | tasks: 36 | pixi task 37 | 38 | .PHONY: help 39 | help: 40 | @echo "$$(tput bold)Available rules:$$(tput sgr0)" 41 | @echo 42 | @sed -n -e "/^## / { \ 43 | h; \ 44 | s/.*//; \ 45 | :doc" \ 46 | -e "H; \ 47 | n; \ 48 | s/^## //; \ 49 | t doc" \ 50 | -e "s/:.*//; \ 51 | G; \ 52 | s/\\n## /---/; \ 53 | s/\\n/ /g; \ 54 | p; \ 55 | }" ${MAKEFILE_LIST} \ 56 | | LC_ALL='C' sort --ignore-case \ 57 | | awk -F '---' \ 58 | -v ncol=$$(tput cols) \ 59 | -v indent=19 \ 60 | -v col_on="$$(tput setaf 6)" \ 61 | -v col_off="$$(tput sgr0)" \ 62 | '{ \ 63 | printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ 64 | n = split($$2, words, " "); \ 65 | line_length = ncol - indent; \ 66 | for (i = 1; i <= n; i++) { \ 67 | line_length -= length(words[i]) + 1; \ 68 | if (line_length <= 0) { \ 69 | line_length = ncol - indent - length(words[i]) - 1; \ 70 | printf "\n%*s ", -indent, " "; \ 71 | } \ 72 | printf "%s ", words[i]; \ 73 | } \ 74 | printf "\n"; \ 75 | }' \ 76 | | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') 77 | -------------------------------------------------------------------------------- /docs/LOCALDNS.md: -------------------------------------------------------------------------------- 1 | # Local DNS 2 | 3 | ## Allowable root domains for local development environments 4 | 5 | When working with local development environments on your laptop, you have flexibility in choosing root domains that are primarily used for internal access and testing purposes. 6 | 7 | Here are some common practices and considerations: 8 | 9 | ### 0. Using a proper DNS with redircts 10 | 11 | - http://*.llminabox.geoheil.com will redirect to 127.0.0.1 12 | - 13 | ### 1. Using `.localhost` 14 | 15 | The `.localhost` Top-Level Domain (TLD) is specifically reserved for loopback purposes, making it an excellent choice for local development. 16 | It's statically defined in host DNS implementations to point to the loopback IP address (127.0.0.1) and is ideal when you need to access a service running directly on your machine without relying on external DNS resolution. 17 | 18 | ### 2. Using custom TLDs for local development 19 | 20 | Many developers use made-up TLDs, such as `.docker` or other custom extensions, to organize their local development domains. For example, a Docker container named "project" might be accessible through `project.docker`. 21 | 22 | It's important to be mindful of potential conflicts, especially when using TLDs that are eventually registered publicly, like the case of the `.dev` TLD which was later acquired by Google and is now a valid registrable domain. 23 | 24 | For these cases, you will need to configure your local DNS settings, such as the `/etc/hosts` file, or use a local DNS server like dnsmasq to resolve your chosen custom domains to the appropriate IP addresses (usually 127.0.0.1 or a Docker container's IP). 25 | 26 | ### 3. Using `.home.arpa` for home networks 27 | 28 | The Internet Engineering Task Force (IETF) approved the .home.arpa TLD specifically for home network use. 29 | 30 | This TLD is suitable when configuring domains within your local home network, such as when assigning names to devices through your router's DHCP server. 31 | 32 | ### 4. Using subdomains of a registered domain 33 | 34 | A more robust approach, especially for complex local development setups or those with future public-facing applications, involves using subdomains of a domain you already own. 35 | 36 | For instance, if you own `example.com`, you could use `corp.example.com` for your internal development environment or `jellyfin.example.com` for a media server. 37 | 38 | This strategy helps prevent potential conflicts with publicly registered domains and allows for smoother transitions if your local projects are eventually deployed to production environments. 39 | 40 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: tpl-update 2 | ## Update template project 3 | tpl-update: 4 | pixi run tpl-update 5 | 6 | 7 | .PHONY: cleanup-state 8 | ## cleanup state 9 | cleanup-state: 10 | pixi run cleanup-state 11 | 12 | 13 | .PHONY: fmt 14 | ## basic auto formatting 15 | fmt: 16 | pixi run -e ci-basics fmt 17 | 18 | 19 | .PHONY: fmt-unsafe 20 | ## enhanced auto formatting 21 | fmt-unsafe: 22 | pixi run -e ci-basics fmt-unsafe 23 | 24 | 25 | .PHONY: lint 26 | ## Ruff based flake8 style linting plus type checking via pyright 27 | lint: 28 | pixi run -e ci-validation lint 29 | 30 | 31 | .PHONY: test 32 | ## Execute tests with coverage 33 | test: 34 | pixi run -e ci-validation test 35 | 36 | 37 | 38 | .PHONY: secrets-encrypt 39 | ## encrypt secrets with SOPS and AGE 40 | secrets-encrypt: 41 | pixi run secrets-encrypt 42 | 43 | 44 | .PHONY: secrets-decrypt 45 | ## encrypt secrets with SOPS and AGE 46 | secrets-decrypt: 47 | pixi run secrets-decrypt 48 | 49 | 50 | ## cleanup local non used branches 51 | clean-local-branches: 52 | pixi run clean-local-branches 53 | 54 | 55 | ################################################################################# 56 | # PROJECT RULES # 57 | ################################################################################# 58 | ################################################################################# 59 | # Self Documenting Commands # 60 | ################################################################################# 61 | .DEFAULT_GOAL := help 62 | # Inspired by 63 | # sed script explained: 64 | # /^##/: 65 | # * save line in hold space 66 | # * purge line 67 | # * Loop: 68 | # * append newline + line to hold space 69 | # * go to next line 70 | # * if line starts with doc comment, strip comment character off and loop 71 | # * remove target prerequisites 72 | # * append hold space (+ newline) to line 73 | # * replace newline plus comments by `---` 74 | # * print line 75 | # Separate expressions are necessary because labels cannot be delimited by 76 | # semicolon; see 77 | tasks: 78 | pixi task 79 | 80 | .PHONY: help 81 | help: 82 | @echo "$$(tput bold)Available rules:$$(tput sgr0)" 83 | @echo 84 | @sed -n -e "/^## / { \ 85 | h; \ 86 | s/.*//; \ 87 | :doc" \ 88 | -e "H; \ 89 | n; \ 90 | s/^## //; \ 91 | t doc" \ 92 | -e "s/:.*//; \ 93 | G; \ 94 | s/\\n## /---/; \ 95 | s/\\n/ /g; \ 96 | p; \ 97 | }" ${MAKEFILE_LIST} \ 98 | | LC_ALL='C' sort --ignore-case \ 99 | | awk -F '---' \ 100 | -v ncol=$$(tput cols) \ 101 | -v indent=19 \ 102 | -v col_on="$$(tput setaf 6)" \ 103 | -v col_off="$$(tput sgr0)" \ 104 | '{ \ 105 | printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ 106 | n = split($$2, words, " "); \ 107 | line_length = ncol - indent; \ 108 | for (i = 1; i <= n; i++) { \ 109 | line_length -= length(words[i]) + 1; \ 110 | if (line_length <= 0) { \ 111 | line_length = ncol - indent - length(words[i]) - 1; \ 112 | printf "\n%*s ", -indent, " "; \ 113 | } \ 114 | printf "%s ", words[i]; \ 115 | } \ 116 | printf "\n"; \ 117 | }' \ 118 | | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') 119 | -------------------------------------------------------------------------------- /services/llm-router/litellm_config.yml: -------------------------------------------------------------------------------- 1 | general_settings: 2 | master_key: "os.environ/LITELLM_MASTER_KEY" 3 | block_robots: true 4 | #alerting: ["slack"] # Setup slack alerting - get alerts on LLM exceptions, Budget Alerts, Slow LLM Responses 5 | proxy_batch_write_at: 60 # Batch write spend updates every 60s 6 | database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number) 7 | 8 | # OPTIONAL Best Practices 9 | #disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus 10 | disable_error_logs: True # turn off writing LLM Exceptions to DB 11 | #allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet. 12 | 13 | litellm_settings: 14 | request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set 15 | #set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on 16 | #json_logs: true # Get debug logs in json format 17 | model_list: 18 | - model_name: gpt-4.1-2025-04-14 19 | litellm_params: 20 | model: openai/gpt-4.1-2025-04-14 21 | api_key: os.environ/OPENAI_API_KEY 22 | - model_name: gpt-4o-2024-11-20 23 | litellm_params: 24 | model: openai/gpt-4o-2024-11-20 25 | api_key: os.environ/OPENAI_API_KEY 26 | - model_name: gpt-4o-mini-2024-07-18 27 | litellm_params: 28 | model: openai/gpt-4o-mini-2024-07-18 29 | api_key: os.environ/OPENAI_API_KEY 30 | - model_name: claude-sonnet-4-20250514 31 | litellm_params: 32 | model: claude-sonnet-4-20250514 33 | api_key: "os.environ/ANTHROPIC_API_KEY" 34 | - model_name: claude-3-7-sonnet-20250219 35 | litellm_params: 36 | model: claude-3-7-sonnet-20250219 37 | api_key: "os.environ/ANTHROPIC_API_KEY" 38 | - model_name: gemini-2.5-flash-preview-05-20 39 | litellm_params: 40 | model: vertex_ai/gemini-2.5-flash-preview-05-20 41 | vertex_project: "ascii-450914" 42 | vertex_location: "us-central1" 43 | vertex_credentials: "/secrets/google_vertexai.json" 44 | - model_name: gemini-2.5-pro-preview-06-05 45 | litellm_params: 46 | model: vertex_ai/gemini-2.5-pro-preview-06-05 47 | vertex_project: "ascii-450914" 48 | vertex_location: "global" 49 | vertex_credentials: "/secrets/google_vertexai.json" 50 | - model_name: gemini-2.5-pro-preview-05-06 51 | litellm_params: 52 | model: vertex_ai/gemini-2.5-pro-preview-05-06 53 | vertex_project: "ascii-450914" 54 | vertex_location: "us-central1" 55 | vertex_credentials: "/secrets/google_vertexai.json" 56 | - model_name: gemini-2.5-flash-preview-04-17 57 | litellm_params: 58 | model: vertex_ai/gemini-2.5-flash-preview-04-17 59 | vertex_project: "ascii-450914" 60 | vertex_location: "us-central1" 61 | vertex_credentials: "/secrets/google_vertexai.json" 62 | - model_name: gemini-2.0-flash-preview-image-generation 63 | litellm_params: 64 | model: vertex_ai/gemini-2.0-flash-preview-image-generation 65 | vertex_project: "ascii-450914" 66 | vertex_location: "us-central1" 67 | vertex_credentials: "/secrets/google_vertexai.json" 68 | - model_name: gemini-2.0-flash-lite-001 69 | litellm_params: 70 | model: vertex_ai/gemini-2.0-flash-lite-001 71 | vertex_project: "ascii-450914" 72 | vertex_location: "us-central1" 73 | vertex_credentials: "/secrets/google_vertexai.json" 74 | - model_name: "gemma3:4b" 75 | litellm_params: 76 | model: "ollama_chat/gemma3:4b" 77 | api_base: "http://ollama:11434" 78 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/llm-router/litellm_config.yml: -------------------------------------------------------------------------------- 1 | general_settings: 2 | master_key: "os.environ/LITELLM_MASTER_KEY" 3 | block_robots: true 4 | #alerting: ["slack"] # Setup slack alerting - get alerts on LLM exceptions, Budget Alerts, Slow LLM Responses 5 | proxy_batch_write_at: 60 # Batch write spend updates every 60s 6 | database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number) 7 | 8 | # OPTIONAL Best Practices 9 | #disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus 10 | disable_error_logs: True # turn off writing LLM Exceptions to DB 11 | #allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet. 12 | 13 | litellm_settings: 14 | request_timeout: 600 # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set 15 | #set_verbose: False # Switch off Debug Logging, ensure your logs do not have any debugging on 16 | #json_logs: true # Get debug logs in json format 17 | model_list: 18 | - model_name: gpt-4.1-2025-04-14 19 | litellm_params: 20 | model: openai/gpt-4.1-2025-04-14 21 | api_key: os.environ/OPENAI_API_KEY 22 | - model_name: gpt-4o-2024-11-20 23 | litellm_params: 24 | model: openai/gpt-4o-2024-11-20 25 | api_key: os.environ/OPENAI_API_KEY 26 | - model_name: gpt-4o-mini-2024-07-18 27 | litellm_params: 28 | model: openai/gpt-4o-mini-2024-07-18 29 | api_key: os.environ/OPENAI_API_KEY 30 | - model_name: claude-sonnet-4-20250514 31 | litellm_params: 32 | model: claude-sonnet-4-20250514 33 | api_key: "os.environ/ANTHROPIC_API_KEY" 34 | - model_name: claude-3-7-sonnet-20250219 35 | litellm_params: 36 | model: claude-3-7-sonnet-20250219 37 | api_key: "os.environ/ANTHROPIC_API_KEY" 38 | - model_name: gemini-2.5-flash-preview-05-20 39 | litellm_params: 40 | model: vertex_ai/gemini-2.5-flash-preview-05-20 41 | vertex_project: "ascii-450914" 42 | vertex_location: "us-central1" 43 | vertex_credentials: "/secrets/google_vertexai.json" 44 | - model_name: gemini-2.5-pro-preview-06-05 45 | litellm_params: 46 | model: vertex_ai/gemini-2.5-pro-preview-06-05 47 | vertex_project: "ascii-450914" 48 | vertex_location: "global" 49 | vertex_credentials: "/secrets/google_vertexai.json" 50 | - model_name: gemini-2.5-pro-preview-05-06 51 | litellm_params: 52 | model: vertex_ai/gemini-2.5-pro-preview-05-06 53 | vertex_project: "ascii-450914" 54 | vertex_location: "us-central1" 55 | vertex_credentials: "/secrets/google_vertexai.json" 56 | - model_name: gemini-2.5-flash-preview-04-17 57 | litellm_params: 58 | model: vertex_ai/gemini-2.5-flash-preview-04-17 59 | vertex_project: "ascii-450914" 60 | vertex_location: "us-central1" 61 | vertex_credentials: "/secrets/google_vertexai.json" 62 | - model_name: gemini-2.0-flash-preview-image-generation 63 | litellm_params: 64 | model: vertex_ai/gemini-2.0-flash-preview-image-generation 65 | vertex_project: "ascii-450914" 66 | vertex_location: "us-central1" 67 | vertex_credentials: "/secrets/google_vertexai.json" 68 | - model_name: gemini-2.0-flash-lite-001 69 | litellm_params: 70 | model: vertex_ai/gemini-2.0-flash-lite-001 71 | vertex_project: "ascii-450914" 72 | vertex_location: "us-central1" 73 | vertex_credentials: "/secrets/google_vertexai.json" 74 | - model_name: "gemma3:4b" 75 | litellm_params: 76 | model: "ollama_chat/gemma3:4b" 77 | api_base: "http://ollama:11434" 78 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json.enc: -------------------------------------------------------------------------------- 1 | { 2 | "data": "ENC[AES256_GCM,data:q16oWolGXvGw7mrtpkt+icsKDu7ItXqFs67RktCmzqpFTMuRNshxIPv9A2Ej6JcqZylm8ep1Uvtd3FZBcp9L5bjzFX/145LazAdF3A93shotBDYyBo84HtgKN1x1tCuWIAcj4lZwANz7yqwnGqkQC25v4XDwErhbH63t+QfJeTXW4If75yeG7vJgteJWBYbINdYmjXRREpsUB1CiK/ITYoijSQko3/hj6kS1554NKUAEISdAeXSjbnzvrAvXfqFXWhRVpQ77hB3F7aq8JynDr27JBlTlJ5wiXbrWg9bMqKZbPUB8iKNj77jnnAa3shSA/Kv1OI2QLtTnihD+aEyzhYw/QZMB4kttFoEFlaI07aSok/1R6zmKC9dBWhY7ZEfo9WVljAnubqaZSevMUuGi3Bc1CUFEP478ledfNrPSo24w7gKJPR6LWt/KIT+6L3RiSGOyD0wv9lSQC/e9L5958RKaBQXvBu2qmiFPdheYTNyBHYvwXobzn+ZR4mfxh055Vb7JptbayYKVi3KQQBnvd4ubFtqCehi9PViXgum4TlUElL/gr3eKJKETL5t4g0ISQ65k6MWlxFGbKeZEZTvcIl9H0XCT4sTtTamPiRzmV3+nMsVJaInZxkceM2IIk6C6R3mnl2nE5+JimPJG/7yNW+beT+WFV7A/lOgHs2Ve9Ib4gNsHsePP4F6D6Mm2oJV/orQw4IP8dGKsy09ljbH/2aYt2gG4KpHCMDvuQU6zH1A1iOkomUtmd5ktHTDZTR2hCzGMR25EWQwb0pcNTP8hx9je4rteMvlVm3umaAWkM81CoN+zGd2fQ1UeTVXPpZK95gWsN6pMkSnQiAe4oPRjTF4dNmOaeJWzbhMn9Yro+WdxYvuA7bqzI4IA5Yac4A9/2z2kFullbp92yKQXORp4PHed4zb8PAqHpMTlLLSoc98K6xQBOQEn8wjTc3kg64ZRcGHfVuHv+3urbFYUVYsWOU0YmN9tm5bjEhMB/SueybwqNo9wT0DWf8ehkLg7vbVmkS5sc0ZRclHH7VpsTYGKZy0we5CS5S75YHEI3prUkwDM81Bg8MeQDCtReBQ/PVF8it1X8dQH7+n0nYFAMg70cqcJ5hXmqIBhBpZSPtKFtvnVK59xl/UXbY22t+cGF5XxIOV467RUr3lkFA/o5L51zrT3XHanY7x8XHm0tk4STaUyDULzTb8EaNKVZACiRWOmVDcc72Vs0/5YfZCRaX0oo15T6vSufidN5Ze228F7Z+loI3ZtFIGK/kdhYPgjuwIOQzUEXoluO+AGV9qzqlflAso2JIanATfGdlB3h0wRqFlDKLUg/7kQenBmaOqd1WAQlMGlG1OWQ61TtG+0Q/gjPDpFS2qz8niuWycVul44EX+0LtA1daBsscH1i0e4JiptzOBWOACQ2xvx6/mefJdC/t97cUlelOLmOROJpIgTP9cDAB5M2uV2GURddQAItQMIfeAqKyPcSFnwCHpdAq9u9Yrm/Djz5DYC25+DKv4Crrr9XumtFlSozjBXO7BESFRPPaBXMkfs+VL/1d/BYe2XrJUl5Lqls1S0To0Jx6y0K8mjnCp43YGY7PyzvkJgJWK73FoPAmPza1cHrDZi4ciORq1Bi4dpPj+VqcJP8Mrw1SUPoIT1Cm9R4J5o1p/FRQhIcX56NrNtGtmDkltJyHF9rrZgXSJldvEcfGeK480BK5xgeU2lRTGhvIaG8jmJF7h6U2pyp4EVzDrXlTa/xzcHQZUF2Nw4pMjdmb2h7gugh9UklzB90Q++gmi3JyvEPkuM6oQ5kWZ3WJ7B+fmhCFVrsKzMK/6G2oIjqArdlJLmhZ9bWspKF2vTlwvcK4s9tJhnrKFw00AFNDYhjh9oGrGWJUMLeFO+vtXUjHSjH0a+MvCGmr6xTwJczA/g1nagIUzi1+YGtMYCDcsFHOO648SpMCuHySbOcPHeW4oub8jUlanZM8QzDrA0jI4+krxTCx5Ia4XV2FMPEA13z8TDNZu/ZGBQptNNa7aEUUcFW2ghq65lyVy3e/g7WkDX+iGs1YGDVkexJ7aZTA5k7NqKqqwR9b8YBHeEhrxgoMRFH5D73kncnuOXSzH4/bIAPzTrL90bOWvC/t7zAueE5qlo/+0/U6cyJNwnCv7X6H/TzIbMT177hZ82YVye+R0075iYzhTWclSbFWho4lMLxhPmJ2iOoObk2qvEEX16mYKaL4Ig3EuUf0dSHEDNAiTpXTjftkhZ7Tle6+W4+o3luEy7VVhIzWuUFmB9oRxcktHBU6s1bnhvu/7Flef+ob1pvV+yCPYAq0PEzxDn/70gWLNBqQc4SRAWEqrs9S1O1TY4yHozjuDF0E/SLUPjCcgCokpOl1F17OlRpI65ywMOYGfgLkH4cGWDRzd5/aJPFUEhBwfdr1kfrACBqphaHbgm7UEvROBaL66frG/OfWDdcpsZo0Y6O9NgMRrlM66d0Nuo95Q7JfluhP0dFTqGiWZJYM2Rzrnory5lG/3Cf5eydumho0rucdKEKAzl6I0sd4VEN63CuKbxECJaj/mqtuM/ZTriKKoLzbBzBcpM7uPPJUAi3NpNC2pWLAbC7lCALrZWos4xW5VVMzH4+pC40M0M7HgdNZRZnh+NXLclJELUkPIR3jLd9wfJnbeYF2CJUT1sQEizW7ssMkP9Vel5BxEBLQqtLCbxUVSYwn2N9YXl/3eZAtX37ECH6mlHrGsmmBYCjChlus0ApobvO4Wlw1zFlhE77wLax2q89+wGq6nI4dUrz4PYWfy2/tY2XWlVu8xMBBbwDPIOt3Z8KZhZeTZYHxGDDDNHn6qGeOP3o1LZejyfhKJ143zLsRAwoCtXt+ffeNPczlsng3LyJYD9seQwdwI/8o8Y2d/AOlVQaFU5mawNQmLViHc85KDZ29GWjuDrhMlJiDZIiKFRlSGGNCiqciT+7RqdbDH8b0s0yff7OYmbwQlXGpTFEY8HezxGDyVpbCduMpNsj5RDab06ExbJcYOtRcisM4xQptlkiylaFaRIISpO25Z+Idv3epfIi6e4Ldasm24LKo/eaVyXtkKh34SbPLq0R2IIVPE5i1GTKx0O/7c/iiXf+0QWPgu0X6mGIQ+yoyAlU6qgrFOrbpqTfd1qpqC2JFT7UaRE,iv:JLLnRY3bfY5bzvNPAQOPyTor8VqVC31q8sunXDkT5WU=,tag:+HuK1F1a/2DE2unBHxvCzA==,type:str]", 3 | "sops": { 4 | "kms": null, 5 | "gcp_kms": null, 6 | "azure_kv": null, 7 | "hc_vault": null, 8 | "age": [ 9 | { 10 | "recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0", 11 | "enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBRS213ZmVQMlFadjlBQjkz\nUW9uZkJxNmY0WmlCdkM0MHJnblhlTGxYTGpBCkxOdDk5aE5Od2FRUGVQa1hUeGFP\ncWxZSm1PVkxjS0M5di85T0owR0RVTTAKLS0tIE1zekw1NnNHZm5sMVlCMWFzVjFJ\nMFRwMFlpaXVLYTQ3UVVzcTdFbEFLUlEKkZcDIrYgquiFo6VX0h8571Ko7F8Np6Qm\nfoOOiMAX9DJrF2GphiYdM8YVQMCQxZ+RFvz+WWmA0j1krs2xeXmjJA==\n-----END AGE ENCRYPTED FILE-----\n" 12 | } 13 | ], 14 | "lastmodified": "2025-07-20T18:20:20Z", 15 | "mac": "ENC[AES256_GCM,data:ZbkbXuIgkvmY53KE0eLYPpftLxd05ykmgmHU8TNpVXDS1RwhG2Gd5wstmdb1y6CiZ7on5cW3wG+vaG9N23SpX0LlUUz01m1DAnRPMThY93/25C6DSUWRTEMLgL6knMrzVygeeQuk2EFiDZYV1SZgarillIZb4Swq77yTDxJDLZg=,iv:AtaINBAw351hXGFQC6CaZ6d9QyqxK4dlZZWxLWkwrE8=,tag:u85Qe0WqeJgmrbeaP5ZJJQ==,type:str]", 16 | "pgp": null, 17 | "unencrypted_suffix": "_unencrypted", 18 | "version": "3.9.4" 19 | } 20 | } -------------------------------------------------------------------------------- /services/llm-router/google_vertexai.json.enc: -------------------------------------------------------------------------------- 1 | { 2 | "data": "ENC[AES256_GCM,data:MasLFwKDcxjF1XHQfkMckAlhRLn8IiEJgPGVjCb8tYp83tsZ/xUXpU6h8J2NCpcm1tZmJJZZ+wSwB0reCGPqzR69SmA5044buMBPknewPA6hebInzgCoorcEFjdYQfQChRIVM1oYQezE0MuZg45bsHH2f7BfrvE+DeN4J1iRWG2B8CNa8VA0wHZ0IBgIE6+Zx+aVrYyc/ZVe9Dz5F9/NcCA/YvNJ1ebmKY7BA1lzXP5BVm8nevVy6v/d7BCUvE+UQa5GIhHLTQUaLWb61kcb6VKxkdiQqZCMSKnjLIHvDa2WKjqeidL2XEv1e87qED3wmU44mX8g+Q2S0RwVa0nCmm1v28xb9kJfcjWS1SUA+BkPOnHTRyfVI3qy9l2QzkBeppkXrfSjhDHhfK2HMppGmg30hGznPYqY9pPai/E21k0Ls8dqX/3cEk796c8Nh0C3T2266elaPjhAzEOCmRWD+NmQ45FReONMCszQtkQ9sbGc8u9JEEqXSWz9HENLtbyXtg3jp04pxau0tSyJeMpDpcSEVWHTwWhcqLYn0tyPTm7hpUZ9U8apvqYfgQZQ3/JjHcpSlwcY4nFG3pWgbaZh7TfP4wSncjuLfFqrkdKIw5Ym7pSd61A6z1dY/PeHWv/qB15Q/4kcf132I4DIlopQAjht9T3+MLMjvoXBhk5RbGt+5HZ5XTDbNoMT+/DUO6FgIqjje18jQd6Ld2s1mIZ8vl179H8kNfyz6M2mVXYCNIS9/J4AXV3bQjbk9a7kHU2RpPfNi6yBvMsmb4nPCuDSmreTyevHDwSsd3MEI4aRCCySurgqwdT8iPd5+ERcGZ2OccKaFRrkOTjsnADLf0xpl24BEBvqt8mKrnp2kI9rzBdoSXr4ayUsHRIw/DecYqrCTl5mripacYkMIILv/vfJUPgr138Un8Ativqeesl+6dlvekh3wwDuhFptJ0a618Ds4N0koYoSNhKyZJZuDRBUX1++bNdL58KW5ykelHzGxW8UmYbXAAVNTmYB1NTP5DK18GP1aS1yM8ngIGW2cYinOa4McHXwNX2YAErRvwOzJrOlXHHUgo9frHLJNouiZq4KK+ntln8y/6NFUwztRTsEQEAuYgZX/YK2Ahfmxld6jlLX5HE9FpFUkHzhSxYcj2nfoU2GJXBzsSOf6KbYlXmZ7HTswTCkHt2l37vbJMA1He0O+hg4RhLK46YIMODakCbtKfUHh7D5j5rZrKNNXyLTNBD8zSUUlAgpr6HtEOgFvabFGTsRa/BdrZ/zjPB3hoe2bqe6m5EsYIGQVIpSy3DZv4tEAaPmYFyivZMlrn+ofUYkP9FSIvfT+vlWp/4/3/Fdh2zBGYGmxBUGGSAYwTmhLxbU/M/hOG1+iY72lMQ7rtmAZympJuqhNr/gDkX4roweJ1tFYKAoVRLJTHkSppwV8n73amqWrFd6LuzOcJp6/NOOY4bvOY3TjpOJQcirTad7XWoUzI+CqDOIQt8NI4giMPA8ZzKUjkqZPqOX03eTXYbWvmqBrxSbcFawHCGT58PWQZ48RxpIVMkP6cmUlsZzrgURzNyScSkcseQe5Ir+6kbgjrO8yARv9LawpY05ccq5I4P4B+FBbO06wVPW83hDm9Emg0+3nt+yRQtojHORar+TJBL9qOE9WU+B6CEam5s1Lm6tbRk7M/te/r7zrYssdRihTN6TOTA32MHunR4KB5IsQjCX5hJywJ6GF3q5d4I8pecF3cDt4P60NJCo5Z7bo+2yzCYDUw8IYnemyLmV6FYkQmoYafWVHXVIXHodX1Olq6KiwCXOGwDXUgHk+LSMGEvrBB+/iuTRazw3CcCOABwjCctnJhExbyA14RS4Voc9bLLbn0v1SXYjEEGnoxvKtaYHQj0h1kO/cgt7ooJBUaUva6FltIY/Y6B9lsPq/fr0Auklf5JmIAECau1e5PopIkQ6IcA8bIrnG/VTHqtv578g6NVEPQYcSl6P8Chvtx7E9FBq4qhcUGy2cY4+pSVRXAoaH2b/FnIVZKN+3sbc/VWLomozwGyaL0ZqvVAQxFATr13YicTB4c1C9magG2PBSYk/ZvS657eVhirKmK5SAWzQT6OBbPeqL8aMttdTc+C4Go1/JI8MG79XHOYw0FlOVzImmpW8w3kgFYt+x/xGE8MGFsp1mbH+ELk6K8jkZQkWjYQKMicgrn6FwrCDPc2GNjqoozegBmzRfytVsJaphUsZJLpR2jMyYbvbHGvnj8/YEmsgKsaT4Y4EWy9E5ZAIA4UNeApiJVlPUmKCOlCiMeoBGP4m67ts7PmaU5odS1N2dfDK2ITcdmFE1HWcCgaEyRglnBPKqPlFVgRpb9TOdv8aEomXLz6KiIKy1nzgT5ODYPhgeXm5rgqb/qJZxMlN4rhJbWdq57jSR8LA6ywjrKUKlaohg702w/a1Sx/cUluGwkKfGtrgXWO4yii/1f0DQEFDzP4BktpY/rqIsjZLB1jlI/jNoI2VKUt89h5Q5nN1CtZNkMIkxKvjSwHOcgjjchOpdzNGEvtZsTmZXpRohFuaAzi3x4Sw59BItfg6HrQUCZvmKC4aIruKlozSbhIU4jflNXc0+cnW8sTwjU6X1qGUssFKSZYjRdIEb9U5V81WtTZSbRFme4X+XkSm7o5gEy9a93+WTDBhS+SrpT9V89RahYTg3hL+o8GefIJtDLZ1jH/opUZd2H4ufYC/jrcTS8zaZmU8tueqiskV+s+SkDauRorpBWTD0aT/DHx0ru41IStXj7ihiA07GZU1nO2iU2736FL5yTAEcwvnhc+mRTadaYHmJ7RcjGE9uawOW8645swGQ1GoUyThNn2V6wS0vVQyUuUP5z+gKQZbZjPtM30DY9UzbdGbPyCdUxNn3tXmqT9LHss0HKCW5PlU36ENmVI7Q/NWdtoeXPHmmgPIHrQq8oByYOUGbK7C8LL4H4BC5RPcHTHsMQGaDLjyglZTYWYaxsX/1E5dI+pheRqUkrd2lSd+EuFCbZwP2NkaQF1R57Tt890vTeJTuTIajlE8/BLlwsLGFlZBYOU5f+GrfPalCnCVo+6DVvujx2vJ3M36ovwzh7i8jkD8TWRnYfKZ5EkhemMoI4N4YOQWmF4ZPFW6AAZp0ufkgySKYye0zRz/rjwFJtvEDkhDi2iZmw7nA4/yKvVuMJgsbOYbqkC1uVA=,iv:MLMMHGYUJZXo2hbe9q6D3eaEFUbxBphz96BvEKIX3nQ=,tag:mUmLEsch/NqdTk97gQiYVg==,type:str]", 3 | "sops": { 4 | "kms": null, 5 | "gcp_kms": null, 6 | "azure_kv": null, 7 | "hc_vault": null, 8 | "age": [ 9 | { 10 | "recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0", 11 | "enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBYaURHK0xlVUNWOWkyczRU\nSVN4cXBRczhrOExUNkI2L3NrU3NwRXBFbm5vCmZHdW1kMWJ0MnFGWGg3NXhwQ2FL\nM0kyTzlZaGs3eUhVVlR2enRxV0R4aTQKLS0tIFJXUFpkeC9NRFB3R2llbHFrTVl2\nd0VzdFd3cCthSkdPbmFGZFZaa0xjWWsKODX6RdDTvFrdpkVPd9V6kuV7xdVGrYyI\nMTFoF384flU75BXq5pnXvhsYRmf1JR4CH/nPLxp8HmJ7txuYS/MERg==\n-----END AGE ENCRYPTED FILE-----\n" 12 | } 13 | ], 14 | "lastmodified": "2025-06-27T10:59:13Z", 15 | "mac": "ENC[AES256_GCM,data:ENKGRaba6XIK+6mw7gti75JGUsSnQwfhvahQxZF78pNDc9sxmXEQqKgfDOVDpJ+w4qoM2J3ancDKuajTTJE4SvzFigKv7tzryaz0AAB0VjkO4BME0uFWmjqwfZIMq+5jTjnILF7xLNJL8l2BZyCWZAplnPUGow1yoHvdTzrI5QE=,iv:UeCUCzurP/eIRxCM8UNsc/cjz19M4/3s7RRfJ/1iKuw=,tag:vT8QbCPjsVXUf6OM17Db/Q==,type:str]", 16 | "pgp": null, 17 | "unencrypted_suffix": "_unencrypted", 18 | "version": "3.9.4" 19 | } 20 | } -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "{{ cookiecutter.project_slug }}" 3 | version = "1.0.0" 4 | description = "{{ cookiecutter.project_name }}" 5 | authors = [{ name = "{{ cookiecutter.author }}", email = "{{ cookiecutter.author_email }}" }] 6 | requires-python = ">=3.13,<3.14" 7 | 8 | [tool.pixi.project] 9 | platforms = ["linux-64", "osx-arm64", "win-64", "linux-aarch64"] 10 | channels = ["conda-forge"] 11 | 12 | [build-system] 13 | build-backend = "hatchling.build" 14 | requires = ["hatchling"] 15 | 16 | [tool.pixi.environments] 17 | template = { features = ["template"], solve-group = "template" } 18 | ci-validation = { features = [ 19 | "ci-basics", 20 | 21 | ], solve-group = "default" } 22 | secops = { features = ["secops"], solve-group = "secops" } 23 | dev = { features = [ 24 | "ci-basics", 25 | ], solve-group = "default" } 26 | 27 | [tool.pixi.feature.ci-basics.dependencies] 28 | yamllint = ">=1.35.1,<2" 29 | taplo = ">=0.9.3,<0.10" 30 | pytest = ">=8.3.4,<9" 31 | pytest-mock = ">=3.14.0,<4" 32 | pytest-cov = "~=6.0.0" 33 | ruff = ">=0.9.4,<1" 34 | pyright = "~=1.1.393" 35 | git = "~=2.47.1" 36 | 37 | [tool.pixi.feature.ci-basics.pypi-dependencies] 38 | moto = "~=5.0.28" 39 | nbqa = "~=1.9.1" 40 | 41 | 42 | [tool.pixi.feature.template.dependencies] 43 | cruft = "~=2.16.0" 44 | 45 | [tool.pixi.feature.template.pypi-dependencies] 46 | jinja2-ospath = ">=0.3.0,<0.4.0" 47 | 48 | [tool.pixi.feature.secops.dependencies] 49 | go-sops = "~=3.9.4" 50 | age = "~=1.2.1" 51 | 52 | [tool.pixi.feature.secops.pypi-dependencies] 53 | 54 | [tool.pixi.tasks] 55 | 56 | [tool.pixi.tasks.fmt] 57 | cmd = "pixi run -e ci-basics ruff format ./src && ruff check --fix ./src && nbqa 'ruff format' src/* && yamllint -c yamllintconfig.yaml . && taplo fmt" 58 | description = "Format python files" 59 | env = { RUST_LOG = "warn" } 60 | 61 | [tool.pixi.tasks.fmt-unsafe] 62 | cmd = "pixi run -e ci-basics nbqa 'ruff format' src/* && ruff format ./src && nbqa ruff --fix --unsafe-fixes ./src/* && ruff check --fix --unsafe-fixes ./src && yamllint -c yamllintconfig.yaml . && taplo fmt" 63 | description = "Format python files - apply automatic ruff unsafe fixes" 64 | 65 | [tool.pixi.tasks.lint] 66 | cmd = "ruff check ./src && yamllint -c yamllintconfig.yaml . && taplo check && pyright" 67 | description = "Validate formatting and type check python files" 68 | 69 | [tool.pixi.tasks.test] 70 | cmd = "pytest --ignore=src/{{ cookiecutter.project_slug_pixi }}/code_location_{{ cookiecutter.project_slug_pixi }}_dbt/dbt_packages src" 71 | description = "Validate formatting and type check python files" 72 | 73 | [tool.pixi.tasks.tpl-update] 74 | cmd = "pixi run -e template cruft update" 75 | description = "Update from template" 76 | 77 | [tool.pixi.tasks.cleanup-state] 78 | cmd = "rm -rf {{ cookiecutter.state_path }}" 79 | description = "clean state directory" 80 | 81 | [tool.pixi.tasks.secrets-encrypt] 82 | cmd = "pixi run -e secops ./scripts/encrypt_secrets.sh" 83 | description = "encrypt secrets with SOPS and AGE" 84 | 85 | [tool.pixi.tasks.secrets-decrypt] 86 | cmd = "pixi run -e secops ./scripts/decrypt_secrets.sh" 87 | description = "decrypt secrets with SOPS and AGE" 88 | 89 | [tool.pixi.tasks.clean-local-branches] 90 | cmd = "pixi run -e ci-basics ./scripts/git_clean_local_branches.sh" 91 | description = "cleanup local non used branches" 92 | 93 | [tool.ruff] 94 | exclude = [ 95 | ".git", 96 | "__pycache__", 97 | "docs/source/conf.py", 98 | "old", 99 | "build", 100 | "dist", 101 | ".pixi", 102 | "src/{{ cookiecutter.project_slug }}/code_location_{{ cookiecutter.project_slug }}_dbt/dbt_packages", 103 | "*.ipynb", 104 | ] 105 | 106 | line-length = 88 107 | 108 | [tool.ruff.lint] 109 | ignore = ["E501"] 110 | select = ["F", "E", "W", "C", "B", "I"] 111 | 112 | [tool.ruff.lint.mccabe] 113 | max-complexity = 5 114 | 115 | [tool.pyright] 116 | include = [ 117 | "src/code_location_{{ cookiecutter.project_slug }}", 118 | ] 119 | exclude = [ 120 | "src/code_location_{{ cookiecutter.project_slug }}/build", 121 | ] 122 | pythonVersion = "3.13" 123 | venvPath = ".pixi/envs" 124 | venv = "ci-validation" 125 | extraPaths = [ 126 | "src/code_location_{{ cookiecutter.project_slug }}", 127 | "src/code_location_foo", 128 | "src/shared_library", 129 | ] 130 | 131 | reportMissingTypeStubs = false 132 | reportImportCycles = "error" 133 | useLibraryCodeForTypes = true 134 | typeCheckingMode = "standard" 135 | 136 | 137 | reportArgumentType = "warning" 138 | reportCallIssue = "warning" 139 | reportOptionalMemberAccess = "warning" 140 | reportOptionalSubscript = "warning" 141 | 142 | [tool.taplo] 143 | exclude = [] -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/.env.enc: -------------------------------------------------------------------------------- 1 | { 2 | "data": "ENC[AES256_GCM,data:fshJya0TEVfaYQvp2XIPpl9hJlbGxpdcyIQBceITg4BC0kcOGh3akHjkvKjIWqfs7TNDeOVKs6oegicVcC5Lii11tF6rWG3CiohdvePKLQhYW4dT8A/C1AUrwWhig/pJjvImB1iEQIVQ5uB5Hgn/4z3YpsPrKMbGFEdlOVpa5+jdoPHnC4ALCoIV8Hh7wv/vMmI+Ayl33dcpcpjWADDSDNjHFDRyCdgTqkWSgaMTw9QMKL/fVgiG+scxGgUBXQkl86Fs5e/ARWstADmbhcmoOtRgYRWwOgSI9sUjQwZON0veP7X8HvXEvaoLhJBLi6rY3507keQlTBea6xz6GkGIXUSR+BwlKj3xaaACjwNnTb6UstZcR7YGeoLPEjEt5d2zA4bJ3gxEX8ttVNh4Df8xJzgKA361gWr6blXNo2fT8r4MSWmHSZem8Jvfh8nzvNjzv2RWpVEkCgiHZXSmxDYSM+WN56vyVVPd+UCD92/fgF2mjLGmrjuJBvPinVltsTEXjnhs7/Eb2YzdGrKJBz+/h+eSF94L2J0ET1NLrKd74VduF0mwmBlxGcZ/IEwCwvYW285jbpNGFE6KDEgoArRMnw0amM68ZdkFZGJm+KwFEXihYqZ6ZTxyTmLTNMqe5C1sLlrVLkGehNOHhjetMtO5Z3VtHIQIZ7s3L9C4dFd9d2VMkdGVoeydyhy4OcnbPy75HkhTA5WwaZ7nD5rJIenVlFaH8Yudj4431eAYm20r45YaWiqsOSc3onfGBEoO0hi5JJSE9JH9NNB49xq1ThDQfjc25sw9nAL/9QFKCeLht0XB988AjH5aAWayshV0DS7HT9MQt+p1kn8GntEeFt2ItFCmNToOmN8czH8dqgcB/p5Ys26IOy98frdaSZLvJCnI8pRmpYluLhYICdvGlQZxEKZtFeqN0dExfHyH3dCc/7iGy80CYLvgT3/aQq+6qMgX3SfwrRsdkm2CTV0Y9jz/OezAbTlrMgSSsV9OhwrwtmIdLJRuZtDaQKOnt4flFGAR1Qhh5ePhyPCgupZoo+JrMTfXONW4eOA4gXeTNpFxt1I/aurmx0ZfHyYkTsEVZwYp1X0kCF+OLcS3UerwayrU0x1ajNuoUZ4UJw66Ea3hL9qD7bXErfTVzXME7kuvmSJZn8sgnGCuk+cBmbmhjG6hSnjx99YJxVLAd+8Kg9DBovTcv3AQfiazvhnsYks0pkj476oZKixEK/mcGt1CmqSFZ7fmMJrbEOAEHvBD5d6ZsEHOVW6bUw+t5uaPiDzQTVeZZ1mGsi3/RBHiKgXcZ2jhMgzYFppkr7Wm+RkLxr0YAei2zAuoqZRe9YloozA2Q7riQ4ZOEwqa8otOwfOI/bct/ungic9VHozj4yGjDPo5myKKjyg6KyUzFhcXSVbpOF860mZSDXIkRF1WenTzVRaC2lVb2e1KiKqFMROMPxeeopuTD7R0ck3p50WXJDzEQHBC35eUQaXCN/6oGtKs0KEDsHBGc2VVT1oEkF4E/34GwzbBrzAJ4Z/XvHb3nV9PI3bcSF19WLENcuYED2R9K0rEi9zHQU2RVR2yrs9CypWYSRzL2scOyE+jMr54vwdzT7zVCIY6d34OnCPkHCuME44MSVk9+zvgmWTVhiL7Bu5PSSpmf/tdgbv5sIZTTNqU1OqEtIUqwXc/n7hq41HdCM9FXtp1C1Xg7I6QWSg/EYxYHtXLV730FEi7+lWT5XwvlplZt6UeMqAUI943LwILb8M9nONGMVBXzKO2r/Tgicfxmg5VlyCAOq2DqSXO416jqZmHMqdifN56ieiRr7aawtUaUUElqjsvFA4EfIUL3MQqmborhlbiAMzh/sH/aITXp0XN/PwltfKMauuwpT3/jnNjE01rzyPeUv6Pk4up5nfsxEFOZuQPq8f8OpMTm+FPlio1GFDqL0nHg8/pU63fc1aujWYORG+xWLvQjPOdetbv5l4QEEIB5Z8glcBPSApApEOR7yYDIm2HLhirFbAVthHN0KvxKvg/NC9R7vsiE4vOT0Khhv0am1SFa+MnVOfSrFnhraOtjKAgUW2U4n5/KokgfGPm7MLFwdGiCCoSxWXouRPZ1wmrZO/kgxVnMnBHRd9aA1HltIWrsLtZslv0zR17Jtxsrk4nRwpk5Z6qT5vyMrray0xhb8jNwInUm+0BiMM3L2x2qZM4ZSuhK0gt7sR4LjdwTVsof7MB2AFSH6B7NtLJoq9aUmLyMh6tmGzeFdAJolpEWyznH4vkrGeWCdCxE3ewX3IxM0NHX9QrcRNoTcjZz49WBD1me0BkJaTQLOaH4sv75spaotryVQajrmxzbHFve8ijVg19H0hmNFmMulavEJH5MMcrAJ4E6ohMMHqdPWxrFrEIhF4EAIyAHuCfjmU26o1GOxGF2/BFj0BbaCq0YTGePRq4gFn0RRnGrtMWgqrbeOi1l1RVdzJOecou9mMUQIn8X6X0s2ffQR2s5re6SRTleeKa8ZPHcHLORlOqKupQw+VCfSCo9sQTeSbdG2PqwLpM7+SKkmkIGxigCHuVJuR18kiizlBiqFnVY4pf1LEgEdOcDArWw69xC5Dt92otIlsGceMkR8Tw8+MjDl+T/VW7p06S8TJJURnieqfLfcmjg8V/3DP792wWtEdIUPFa/ky5Wqp4OJwD+UIjCxAcimody1aTThvuUhMge1QDsp5mtpxAacnL295bKZwObIOzGforK8zc+MgONoZhUblbLn7qspTB5zd8yEzkit9eIkLDOzFZG1vX9OJXCrbP2411oZp8gzJ2KFnAG2Wq/jhwofA06torI28DgP6NoJhG4rArGaLAtEu0D2TZHXbfpcTcAGCNCKHoujI7XpDBcrNE3T561du5AMfINuacFh5PF49p2Hba/jpfR0PMtIyE8/IExBlMzTn9k/2TLolfWR6jeIfQ+xjI2SpHpKh4dbWHq/T5bZFNuAvPl8s+Fpq3+7FuzLWs665XrEU/rFlHdiMmwfahQVWaz8p6CUrgRkR/2xB78Hj0AslrVTpET0txpuwtPrnl5A8fVtOeQl2pqE3bLaDRBu+pNgZRtJhcn96BSYFZrz5LcOHHDzk3w/KgAPcu/QmkaBP1Ui5UCMOuwQP+lNKf05qVLIiq7uNLCDtuUB+cf2AglQB5iv6KLyyjStw7LV6jmQI+p4UXDPMUjLxymeYtW8upC7cywTMa6/Tb1TITPswMV46w/vEgJo0JPyME901oUdLKBZorm9ru4MyOt9hJowKNg05jAuWavNzZrNuL8sa83BrGGiyvU8cgdTX6xYPV1i0vuBDYSzlgCz/O1cTdGn2ev9w2O0//8EqsEwGFzx292lHRXqs7NU7ikUMrzgE5HSxs+cmYMy4RCrZgTsMCJIWbEkVdckUR3n/9BQ2Ererz0nhXrK3WGcz5GWDfsWlc6gJwId4/IjZZ7yeoseswf1viRrA9a56a5L/rsO5i13z3FTI/217vrQMGUCnAU8we/NFTS5QU5Q8MdPRhLvImzWGCPFJWeUICNYp+Y+in3i0X0JET0GxF/oxK+8tQKk+oLbmGgBLJObuP0F4IcO8Vw298DQTNUZU4POAXAr3NQnCD1I6cSrtErdoqtBj0wbgT0AMZwXeq4UWYGnx1n797rVq1yjSFIzqNMkqsZUnhRrfyqT3VIkFKPUdpbZjY1UKYUbY=,iv:vmMSzrWHa09eKQELkpQmWJAn6IdEACaaz1avi+Bm+78=,tag:YZf7lm3gK9SkM6EAXIgljg==,type:str]", 3 | "sops": { 4 | "kms": null, 5 | "gcp_kms": null, 6 | "azure_kv": null, 7 | "hc_vault": null, 8 | "age": [ 9 | { 10 | "recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0", 11 | "enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBQUC9mSUU2ckdIaHpoaURr\nYVRFNExqQ3JFYUtjMlB5LzFJdjdLSE9qOGhVCldxeVlRMW9NR2QwTGtMSGl5ZjlB\nUUFQWDQxUFdjRXJpdDJFTG52RElxK28KLS0tIE01YUptN0tObklJM1VrMDFYUkl1\nUUpJbzNVRzY1RzhUc3BvTGUrN3orVW8KBPwIDpzBJztR3FeCD5G83zL/skUMLWaL\nkYIZO4VE0XImokfHp23wYQ+xZXsOYCZ/PRaj4WehF4qWU0vhWVJdYg==\n-----END AGE ENCRYPTED FILE-----\n" 12 | } 13 | ], 14 | "lastmodified": "2025-07-16T11:44:07Z", 15 | "mac": "ENC[AES256_GCM,data:nvefZh1h+jsveDF/zv0Nrpf1NR3qpSMHubWnwIAfYweVIul+TVYx9cucb/zDvWYv8fqWkCDUoEznFXprO3M/guLcZFClipTmBxhqEwnWfy7M3HIY7oN+FfV6cCid0I1CmoVCJ4RA8ZEteZR9tl+B7WC3n1ChK/2l7m7ovnXOIE0=,iv:dWnBunipiF9ju67qLKEDU6h7IG/zGMtF8pCMRyfH3lY=,tag:PgQJas5uSAKyI9SI0o/SJg==,type:str]", 16 | "pgp": null, 17 | "unencrypted_suffix": "_unencrypted", 18 | "version": "3.9.4" 19 | } 20 | } -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/generate-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Generate .env file from .env_template with auto-generated secure values 4 | 5 | if [ -f .env ]; then 6 | echo "⚠️ .env file already exists!" 7 | read -p "Do you want to overwrite it? (y/N): " -n 1 -r 8 | echo 9 | if [[ ! $REPLY =~ ^[Yy]$ ]]; then 10 | echo "Exiting without changes." 11 | exit 1 12 | fi 13 | fi 14 | 15 | echo "🔧 Generating .env file from .env_template..." 16 | 17 | # Function to generate random hex string 18 | generate_hex() { 19 | openssl rand -hex 32 20 | } 21 | 22 | # Function to generate simple password 23 | generate_password() { 24 | # Generate a pronounceable password for easier initial setup 25 | echo "$(openssl rand -base64 12 | tr -d "=+/" | cut -c1-8)$(shuf -i 1000-9999 -n 1)" 26 | } 27 | 28 | # Start with template 29 | cat > .env << 'EOF' 30 | # Domain configuration 31 | ROOT_DOMAIN=llminabox.geoheil.com 32 | # ROOT_DOMAIN=project.docker # see LOCALDNS.md for more information 33 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22 34 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12 35 | TZ=UTC 36 | EOF 37 | 38 | # Add database configuration with standard naming 39 | cat >> .env << 'EOF' 40 | 41 | # LLM Router Database (LiteLLM) 42 | LLM_ROUTER_DB=litellm 43 | LLM_ROUTER_DB_USER=litellm 44 | EOF 45 | echo "LLM_ROUTER_DB_PASSWORD=$(generate_hex)" >> .env 46 | 47 | # Add LiteLLM configuration 48 | cat >> .env << 'EOF' 49 | 50 | # LiteLLM Configuration 51 | EOF 52 | echo "LITELLM_MASTER_KEY=$(generate_hex)" >> .env 53 | echo "LITELLM_SALT_KEY=$(generate_hex)" >> .env 54 | echo "LITELLM_UI_USERNAME=admin" >> .env 55 | echo "LITELLM_UI_PASSWORD=$(generate_password)" >> .env 56 | 57 | # Add API keys - check environment first 58 | cat >> .env << 'EOF' 59 | 60 | # API Keys for Model Providers 61 | EOF 62 | 63 | # Check for OpenAI key in environment 64 | if [ -n "$OPENAI_API_KEY" ]; then 65 | echo "ROUTER_OPENAI_API_KEY=$OPENAI_API_KEY" >> .env 66 | echo " ✅ Found OpenAI API key in environment" 67 | else 68 | echo "# Get from: https://platform.openai.com/api-keys" >> .env 69 | echo "ROUTER_OPENAI_API_KEY=sk-CHANGEME_YOUR_OPENAI_KEY" >> .env 70 | fi 71 | 72 | # Check for Anthropic key in environment 73 | if [ -n "$ANTHROPIC_API_KEY" ]; then 74 | echo "ROUTER_ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env 75 | echo " ✅ Found Anthropic API key in environment" 76 | else 77 | echo "" >> .env 78 | echo "# Get from: https://console.anthropic.com/settings/keys" >> .env 79 | echo "ROUTER_ANTHROPIC_API_KEY=sk-CHANGEME_YOUR_ANTHROPIC_KEY" >> .env 80 | fi 81 | 82 | # Check for Hugging Face key in environment 83 | if [ -n "$HUGGING_FACE_HUB_TOKEN" ]; then 84 | echo "" >> .env 85 | echo "HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN" >> .env 86 | echo " ✅ Found Hugging Face API key in environment" 87 | else 88 | echo "" >> .env 89 | echo "# Hugging Face API Key" >> .env 90 | echo "# Get from: https://huggingface.co/settings/tokens" >> .env 91 | echo "HUGGING_FACE_HUB_TOKEN=hf_CHANGEME_YOUR_HUGGING_FACE_API_KEY" >> .env 92 | fi 93 | 94 | # Add Chat UI database configuration 95 | cat >> .env << 'EOF' 96 | 97 | # Chat UI Database (OpenWebUI) 98 | CHAT_UI_DB=openwebui 99 | CHAT_UI_DB_USER=openwebui 100 | EOF 101 | echo "CHAT_UI_DB_PASSWORD=$(generate_hex)" >> .env 102 | echo "CHAT_UI_SECRET_KEY=$(generate_hex)" >> .env 103 | 104 | # Add Qdrant configuration 105 | cat >> .env << 'EOF' 106 | 107 | # Vector Database (Qdrant) 108 | EOF 109 | echo "QDRANT__SERVICE__API_KEY=$(generate_hex)" >> .env 110 | 111 | echo "" 112 | echo "✅ .env file generated successfully!" 113 | echo "" 114 | echo "📋 Configuration summary:" 115 | echo " - Timezone: UTC" 116 | echo " - Domain: project.docker" 117 | 118 | # Check which API keys still need to be added 119 | missing_keys="" 120 | if [ -z "$OPENAI_API_KEY" ] && grep -q "CHANGEME_YOUR_OPENAI_KEY" .env; then 121 | missing_keys="$missing_keys\n - ROUTER_OPENAI_API_KEY: Get from https://platform.openai.com/api-keys" 122 | fi 123 | if [ -z "$ANTHROPIC_API_KEY" ] && grep -q "CHANGEME_YOUR_ANTHROPIC_KEY" .env; then 124 | missing_keys="$missing_keys\n - ROUTER_ANTHROPIC_API_KEY: Get from https://console.anthropic.com/settings/keys" 125 | fi 126 | if [ -z "$HUGGING_FACE_HUB_TOKEN" ] && grep -q "CHANGEME_YOUR_HUGGING_FACE_API_KEY" .env; then 127 | missing_keys="$missing_keys\n - HUGGING_FACE_HUB_TOKEN: Get from https://huggingface.co/settings/tokens" 128 | fi 129 | 130 | if [ -n "$missing_keys" ]; then 131 | echo "" 132 | echo "⚠️ IMPORTANT: You still need to add your API keys:" 133 | echo -e "$missing_keys" 134 | fi 135 | 136 | echo "" 137 | echo "📝 The generated passwords are for development use. For production, use stronger passwords!" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,git 3 | 4 | ### Git ### 5 | # Created by git for backups. To disable backups in Git: 6 | # $ git config --global mergetool.keepBackup false 7 | *.orig 8 | 9 | # Created by git when using merge tools for conflicts 10 | *.BACKUP.* 11 | *.BASE.* 12 | *.LOCAL.* 13 | *.REMOTE.* 14 | *_BACKUP_*.txt 15 | *_BASE_*.txt 16 | *_LOCAL_*.txt 17 | *_REMOTE_*.txt 18 | 19 | ### Python ### 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | share/python-wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .nox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | *.py,cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | cover/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | .pybuilder/ 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | # For a library or package, you might want to ignore these files since the code is 106 | # intended to run in multiple environments; otherwise, check them in: 107 | # .python-version 108 | 109 | # pipenv 110 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 111 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 112 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 113 | # install all needed dependencies. 114 | #Pipfile.lock 115 | 116 | # poetry 117 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 118 | # This is especially recommended for binary packages to ensure reproducibility, and is more 119 | # commonly ignored for libraries. 120 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 121 | #poetry.lock 122 | 123 | # pdm 124 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 125 | #pdm.lock 126 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 127 | # in version control. 128 | # https://pdm.fming.dev/#use-with-ide 129 | .pdm.toml 130 | 131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 132 | __pypackages__/ 133 | 134 | # Celery stuff 135 | celerybeat-schedule 136 | celerybeat.pid 137 | 138 | # SageMath parsed files 139 | *.sage.py 140 | 141 | # Environments 142 | .env 143 | .venv 144 | env/ 145 | venv/ 146 | ENV/ 147 | env.bak/ 148 | venv.bak/ 149 | 150 | # Spyder project settings 151 | .spyderproject 152 | .spyproject 153 | 154 | # Rope project settings 155 | .ropeproject 156 | 157 | # mkdocs documentation 158 | /site 159 | 160 | # mypy 161 | .mypy_cache/ 162 | .dmypy.json 163 | dmypy.json 164 | 165 | # Pyre type checker 166 | .pyre/ 167 | 168 | # pytype static type analyzer 169 | .pytype/ 170 | 171 | # Cython debug symbols 172 | cython_debug/ 173 | 174 | # PyCharm 175 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 176 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 177 | # and can be added to the global gitignore or merged into this file. For a more nuclear 178 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 179 | #.idea/ 180 | 181 | ### Python Patch ### 182 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 183 | poetry.toml 184 | 185 | # ruff 186 | .ruff_cache/ 187 | 188 | # LSP config files 189 | pyrightconfig.json 190 | 191 | ### VisualStudioCode ### 192 | .vscode/* 193 | !.vscode/settings.json 194 | !.vscode/tasks.json 195 | !.vscode/launch.json 196 | !.vscode/extensions.json 197 | !.vscode/*.code-snippets 198 | 199 | # Local History for Visual Studio Code 200 | .history/ 201 | 202 | # Built Visual Studio Code Extensions 203 | *.vsix 204 | 205 | ### VisualStudioCode Patch ### 206 | # Ignore all local history of files 207 | .history 208 | .ionide 209 | 210 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git 211 | 212 | 213 | {{ cookiecutter.project_slug }}/.env 214 | {{ cookiecutter.project_slug }}/pyproject.toml 215 | {{ cookiecutter.project_slug }}/key.txt 216 | {{ cookiecutter.project_slug }}/pyproject.toml 217 | {{ cookiecutter.project_slug }}/services/llm-router/google_vertexai.json 218 | rendered-template/ 219 | key.txt 220 | {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json 221 | .idea/ 222 | llm_in_a_box/ 223 | -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,git 3 | 4 | ### Git ### 5 | # Created by git for backups. To disable backups in Git: 6 | # $ git config --global mergetool.keepBackup false 7 | *.orig 8 | 9 | # Created by git when using merge tools for conflicts 10 | *.BACKUP.* 11 | *.BASE.* 12 | *.LOCAL.* 13 | *.REMOTE.* 14 | *_BACKUP_*.txt 15 | *_BASE_*.txt 16 | *_LOCAL_*.txt 17 | *_REMOTE_*.txt 18 | 19 | ### Python ### 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | share/python-wheels/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | MANIFEST 47 | 48 | # PyInstaller 49 | # Usually these files are written by a python script from a template 50 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 51 | *.manifest 52 | *.spec 53 | 54 | # Installer logs 55 | pip-log.txt 56 | pip-delete-this-directory.txt 57 | 58 | # Unit test / coverage reports 59 | htmlcov/ 60 | .tox/ 61 | .nox/ 62 | .coverage 63 | .coverage.* 64 | .cache 65 | nosetests.xml 66 | coverage.xml 67 | *.cover 68 | *.py,cover 69 | .hypothesis/ 70 | .pytest_cache/ 71 | cover/ 72 | 73 | # Translations 74 | *.mo 75 | *.pot 76 | 77 | # Django stuff: 78 | *.log 79 | local_settings.py 80 | db.sqlite3 81 | db.sqlite3-journal 82 | 83 | # Flask stuff: 84 | instance/ 85 | .webassets-cache 86 | 87 | # Scrapy stuff: 88 | .scrapy 89 | 90 | # Sphinx documentation 91 | docs/_build/ 92 | 93 | # PyBuilder 94 | .pybuilder/ 95 | target/ 96 | 97 | # Jupyter Notebook 98 | .ipynb_checkpoints 99 | 100 | # IPython 101 | profile_default/ 102 | ipython_config.py 103 | 104 | # pyenv 105 | # For a library or package, you might want to ignore these files since the code is 106 | # intended to run in multiple environments; otherwise, check them in: 107 | # .python-version 108 | 109 | # pipenv 110 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 111 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 112 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 113 | # install all needed dependencies. 114 | #Pipfile.lock 115 | 116 | # poetry 117 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 118 | # This is especially recommended for binary packages to ensure reproducibility, and is more 119 | # commonly ignored for libraries. 120 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 121 | #poetry.lock 122 | 123 | # pdm 124 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 125 | #pdm.lock 126 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 127 | # in version control. 128 | # https://pdm.fming.dev/#use-with-ide 129 | .pdm.toml 130 | 131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 132 | __pypackages__/ 133 | 134 | # Celery stuff 135 | celerybeat-schedule 136 | celerybeat.pid 137 | 138 | # SageMath parsed files 139 | *.sage.py 140 | 141 | # Environments 142 | .env 143 | .venv 144 | env/ 145 | venv/ 146 | ENV/ 147 | env.bak/ 148 | venv.bak/ 149 | 150 | # Spyder project settings 151 | .spyderproject 152 | .spyproject 153 | 154 | # Rope project settings 155 | .ropeproject 156 | 157 | # mkdocs documentation 158 | /site 159 | 160 | # mypy 161 | .mypy_cache/ 162 | .dmypy.json 163 | dmypy.json 164 | 165 | # Pyre type checker 166 | .pyre/ 167 | 168 | # pytype static type analyzer 169 | .pytype/ 170 | 171 | # Cython debug symbols 172 | cython_debug/ 173 | 174 | # PyCharm 175 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 176 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 177 | # and can be added to the global gitignore or merged into this file. For a more nuclear 178 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 179 | #.idea/ 180 | 181 | ### Python Patch ### 182 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 183 | poetry.toml 184 | 185 | # ruff 186 | .ruff_cache/ 187 | 188 | # LSP config files 189 | pyrightconfig.json 190 | 191 | ### VisualStudioCode ### 192 | .vscode/* 193 | !.vscode/settings.json 194 | !.vscode/tasks.json 195 | !.vscode/launch.json 196 | !.vscode/extensions.json 197 | !.vscode/*.code-snippets 198 | 199 | # Local History for Visual Studio Code 200 | .history/ 201 | 202 | # Built Visual Studio Code Extensions 203 | *.vsix 204 | 205 | ### VisualStudioCode Patch ### 206 | # Ignore all local history of files 207 | .history 208 | .ionide 209 | 210 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git 211 | 212 | 213 | {{ cookiecutter.project_slug }}/.env 214 | {{ cookiecutter.project_slug }}/pyproject.toml 215 | {{ cookiecutter.project_slug }}/key.txt 216 | {{ cookiecutter.project_slug }}/pyproject.toml 217 | {{ cookiecutter.project_slug }}/services/llm-router/google_vertexai.json 218 | rendered-template/ 219 | key.txt 220 | {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json 221 | .idea/ 222 | -------------------------------------------------------------------------------- /docs/cilogon-integration.md: -------------------------------------------------------------------------------- 1 | # OIDC Integration Using CILogon 2 | 3 | ## Introduction 4 | 5 | [CILogon](https://www.cilogon.org) provides a standards-compliant OpenID 6 | Connect (OAuth 2.0) interface to federated authentication for 7 | cyberinfrastructure (CI). CILogon's federated identity management enables 8 | researchers to use their home organization credentials to access 9 | applications, rather than requiring yet another username and password to 10 | log on. 11 | 12 | CILogon is operated by the 13 | [National Center for Supercomputing Applications (NCSA)](https://www.ncsa.illinois.edu/) 14 | at the [University of Illinois at Urbana-Champaign](https://illinois.edu/). 15 | 16 | ## Prequisites 17 | 18 | You should have already successfully deployed and configured the 19 | chat service (Open WebUI) using the standard login form authentication. 20 | This guide only details additional configuration needed for OIDC integration 21 | using CILogon and does not address overall configuration issues. 22 | 23 | The [OIDC protocol](https://openid.net/specs/openid-connect-core-1_0.html) 24 | requires web applications to be served using HTTPS. Your service should 25 | already be 26 | [configured to use HTTPS](link-to-traefix-documentation). 27 | The only exception to this requirement is during exploration or development 28 | when `http://localhost` or `http://127.0.0.1` may be used. 29 | 30 | ## Request Your CILogon Client ID and Secret 31 | 32 | CILogon subscribers may log into their CILogon Registry service and use 33 | the self-service interface to request a client ID and secret. 34 | 35 | Basic Authentication service tier (free) users should request a client 36 | following the instructions below and wait for a notice of approval. 37 | 38 | 1. Browse to the 39 | [CILogon OpenID Connect (OIDC) Client Registration](https://cilogon.org/oauth2/register) 40 | form. 41 | 42 | 1. Complete the form fields for Client Name, Contact Email, and Home URL. 43 | 44 | 1. For the Callback URLs field enter `https:///oauth/oidc/callback` 45 | and repalce `` with the hostname or service name for your 46 | deployment. 47 | 48 | 1. For Scopes tick the boxes for email, openid, and profile. 49 | 50 | 1. Click `Register Client`. 51 | 52 | 1. Record the client ID and secret. You must safely escrow the client secret 53 | since CILogon does not store it and only stores a computed hash of the 54 | secret. 55 | 56 | 1. Wait for an email indicating your client has been approved. You cannot 57 | successfully test your configuration until the client has been approved. 58 | 59 | ## Configuration 60 | 61 | The OAuth or OIDC integration for Open WebUI may be completely configured 62 | using environment variables. For additional details beyond those below 63 | see the following Open WebUI documentation: 64 | 65 | - [Environment Variable Configuration](https://docs.openwebui.com/getting-started/env-configuration) 66 | - [SSO(OAuth, OIDC, Trusted Header)](https://docs.openwebui.com/features/auth/sso/) 67 | - [Troubleshooting OAUTH/SSO Issues](https://docs.openwebui.com/troubleshooting/sso/) 68 | 69 | 70 | Edit the `llm_chat_ui.environment` section of the `docker-compose.yml` file 71 | as follows: 72 | 73 | 1. 74 | ``` 75 | ENABLE_OAUTH_PERSISTENT_CONFIG: "False" 76 | ``` 77 | 78 | This forces the OAuth configuration to always be read from environment 79 | variables on every restart. 80 | 81 | 1. 82 | ``` 83 | ENABLE_SIGNUP: "True" 84 | ``` 85 | 86 | Enable user account creation generally. See also `ENABLE_OAUTH_SIGNUP`. 87 | 88 | 1. 89 | ``` 90 | ENABLE_OAUTH_SIGNUP: "True" 91 | ``` 92 | 93 | Enable user account creation when authenticating using OAuth. 94 | 95 | 1. 96 | ``` 97 | WEBUI_URL: "https://" 98 | ``` 99 | 100 | Replace `` with the hostname from which your service will 101 | be served. Open WebUI uses this configuration to construct the appropriate 102 | return URI used during the OAuth or OIDC authentication flow. 103 | 104 | 1. 105 | ``` 106 | OAUTH_CLIENT_ID: "" 107 | ``` 108 | 109 | Replace `` with the client ID obtained when 110 | you requested your client from CILogon. The client ID will usually follow 111 | the format 112 | 113 | ``` 114 | cilogon:/client_id/... 115 | ``` 116 | 117 | 1. 118 | ``` 119 | OAUTH_CLIENT_SECRET: "" 120 | ``` 121 | 122 | Replace `` with the client secret 123 | obtained when you requested your client from CILogon. 124 | 125 | 1. 126 | ``` 127 | OPENID_PROVIDER_URL: "https://cilogon.org/.well-known/openid-configuration" 128 | ``` 129 | 130 | 1. 131 | ``` 132 | OAUTH_PROVIDER_NAME: "CILogon" 133 | ``` 134 | 135 | 1. 136 | ``` 137 | OAUTH_SCOPES: "openid email profile" 138 | ``` 139 | 140 | 1. 141 | ``` 142 | OPENID_REDIRECT_URI: "https:///oauth/oidc/callback" 143 | ``` 144 | 145 | 1. 146 | ``` 147 | OAUTH_ALLOWED_DOMAINS: "" 148 | ``` 149 | 150 | Since CILogon supports authentication from over 6,000 campus login servers 151 | around the world you may wish to restrict login from only your campus 152 | users. To do so enter the domain of your campus, for example 153 | 154 | ``` 155 | illinois.edu 156 | ``` 157 | 158 | You may use a comma-separted list of domains, for example 159 | 160 | ``` 161 | illinois.edu,berkeley.edu,tuwien.ac.at 162 | ``` 163 | 164 | CILogon subscribers may instead request that the OAuth2 server restrict 165 | logins from only a subset of login servers (server-side authorization is 166 | not available without a subscription). 167 | 168 | ## Restart and Test 169 | 170 | After you have received email notification that your CILogon client has been 171 | approved and you have edited the `docker-compose.yml` file as detailed above, 172 | you may restart your service and test the CILogon OIDC integration. 173 | -------------------------------------------------------------------------------- /docs/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'LLM-in-a-Box: A Templated, Self-Hostable Framework for Generative AI in Research' 3 | tags: 4 | - Python 5 | - Docker 6 | - LLM 7 | - generative AI 8 | - self-hosting 9 | - reproducible research 10 | - RSE 11 | - ollama 12 | - litellm 13 | - docling 14 | - qdrant 15 | - traefik 16 | - secops 17 | - sops 18 | - age 19 | authors: 20 | - name: Georg Heiler 21 | orcid: 0000-0002-8684-1163 22 | affiliation: "1, 2" 23 | - name: Aaron Culich 24 | affiliation: 3 25 | affiliations: 26 | - name: Complexity Science Hub Vienna (CSH) 27 | index: 1 28 | - name: Austrian Supply Chain Intelligence Institute (ASCII) 29 | index: 2 30 | - name: Eviction Research Network at University of California, Berkeley 31 | index: 3 32 | date: 1st July 2025 33 | bibliography: paper.bib 34 | 35 | # Optional fields if submitting to a AAS journal too, see this blog post: 36 | # (Reverse Proxy)"] 14 | 15 | subgraph CoreMain ["Main Services"] 16 | direction LR 17 | OpenWebUI["OpenWebUI
(Chat UI)"] 18 | Docling["Docling
(Document Extraction)"] 19 | end 20 | 21 | subgraph CoreBackend ["Backend Services"] 22 | direction LR 23 | LiteLLM["LiteLLM
(Model Router)"] 24 | Ollama["Ollama
(Model Server)"] 25 | end 26 | 27 | Postgres["Postgres
(State DB)"] 28 | end 29 | 30 | %% Addon Services (Yellow Box) 31 | subgraph Addons ["Addon Services"] 32 | direction TB 33 | 34 | subgraph AddonsTop ["Infrastructure"] 35 | direction LR 36 | API["API Gateway
(e.g. Kong)"] 37 | Monitoring["Monitoring
(Prometheus/Grafana)"] 38 | SSO["SSO/Identity
(Keycloak)"] 39 | end 40 | 41 | subgraph AddonsMiddle ["Data & Storage"] 42 | direction LR 43 | FileStore["Object Storage
(MinIO/S3)"] 44 | Qdrant["Qdrant
(Vector DB)"] 45 | DataViz["Data Visualization
(Metabase)"] 46 | end 47 | 48 | subgraph AddonsBottom ["Workflow & Automation"] 49 | direction LR 50 | Workflow["Workflow Engines
(Temporal)"] 51 | Notebooks["Jupyter/Polynote
(Notebooks)"] 52 | n8n["n8n
(Automation)"] 53 | end 54 | end 55 | 56 | %% Core Internal Connections 57 | Traefik --> OpenWebUI 58 | Traefik --> Docling 59 | Traefik --> LiteLLM 60 | Traefik --> Ollama 61 | 62 | OpenWebUI -->|API| LiteLLM 63 | OpenWebUI -->|State| Postgres 64 | 65 | Docling -->|RAG| LiteLLM 66 | Docling -->|State| Postgres 67 | 68 | LiteLLM -->|Model API| Ollama 69 | LiteLLM -->|State| Postgres 70 | 71 | %% Addon to Core Connections (with labels) 72 | API -.->|"Proxy"| Traefik 73 | Monitoring -.->|"Metrics"| Traefik 74 | Monitoring -.->|"Health"| OpenWebUI 75 | Monitoring -.->|"Performance"| LiteLLM 76 | Monitoring -.->|"Resources"| Ollama 77 | Monitoring -.->|"Database"| Postgres 78 | 79 | SSO -.->|"Auth"| Traefik 80 | SSO -.->|"Login"| OpenWebUI 81 | 82 | FileStore -.->|"Documents"| Docling 83 | FileStore -.->|"Models"| Ollama 84 | 85 | Qdrant -.->|"Vector Search"| Docling 86 | Qdrant -.->|"Embeddings"| LiteLLM 87 | 88 | DataViz -.->|"Analytics"| Postgres 89 | 90 | Workflow -.->|"Orchestration"| Docling 91 | Workflow -.->|"Model Calls"| LiteLLM 92 | Workflow -.->|"Inference"| Ollama 93 | 94 | Notebooks -.->|"Data Processing"| Docling 95 | Notebooks -.->|"Model Testing"| LiteLLM 96 | Notebooks -.->|"Experiments"| Ollama 97 | Notebooks -.->|"Analysis"| Postgres 98 | 99 | n8n -.->|"Automation"| Docling 100 | n8n -.->|"API Integration"| LiteLLM 101 | n8n -.->|"Model Pipeline"| Ollama 102 | n8n -.->|"Data Sync"| Postgres 103 | 104 | %% Styling 105 | classDef core fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#0d47a1 106 | classDef addon fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#e65100 107 | classDef coreMain fill:#bbdefb,stroke:#1976d2,stroke-width:2px 108 | classDef coreBackend fill:#90caf9,stroke:#1976d2,stroke-width:2px 109 | classDef addonTop fill:#ffecb3,stroke:#f57c00,stroke-width:2px 110 | classDef addonMiddle fill:#ffe0b2,stroke:#f57c00,stroke-width:2px 111 | classDef addonBottom fill:#ffcc02,stroke:#f57c00,stroke-width:2px 112 | 113 | class Core core 114 | class Addons addon 115 | class CoreMain coreMain 116 | class CoreBackend coreBackend 117 | class AddonsTop addonTop 118 | class AddonsMiddle addonMiddle 119 | class AddonsBottom addonBottom 120 | ``` 121 | 122 | --- 123 | 124 | ## Layered Stack: Core, Addons, and Extensions 125 | 126 | ### Core (Turnkey, Always Included) 127 | - **Reverse Proxy**: Traefik (or Nginx) 128 | - **Chat UI**: OpenWebUI (or similar) 129 | - **Model Router**: LiteLLM (or OpenRouter, vLLM) 130 | - **Model Server**: Ollama (or LM Studio, vLLM, sglang) 131 | - **Document Extraction**: Docling (for high qualtiy document preparation to improve RAG results) 132 | - **State DB**: Postgres 133 | 134 | ### Addons (Quick-Add, Highly Recommended) 135 | - **Vector DB**: Qdrant, Milvus, Weaviate, or Chroma 136 | - **Automation/Orchestration**: n8n, dagster, or Airflow 137 | - **SSO/Identity**: Keycloak, Authentik, or Auth0 138 | - **Monitoring/Observability**: Prometheus, Grafana, Loki 139 | - **Object/File Storage**: MinIO, S3 140 | - **Notebooks**: Jupyter, Polynote 141 | - **Data Visualization**: Metabase, Superset 142 | - **Workflow Engines**: Temporal, Argo Workflows 143 | - **API Gateway**: Kong, Ambassador 144 | 145 | ### Extensions/Specializations (Optional, Use-Case Driven) 146 | - **Fine-tuning/Training UI**: LoRA Studio, Hugging Face AutoTrain 147 | - **Agent Frameworks**: LangChain, CrewAI, AutoGen 148 | - **Data Labeling**: Label Studio 149 | - **ML Experiment Tracking**: MLflow, Weights & Biases 150 | - **RAG Frameworks**: LlamaIndex, Haystack 151 | - **Search**: OpenSearch, Elasticsearch 152 | - **Chatbot Integrations**: Slack, Discord, Teams, Webhooks 153 | - **Analytics**: Amplitude, PostHog 154 | - **Security**: Vault, SOPS, OPA 155 | 156 | 157 | ## Core vs. Addons/Extensions 158 | 159 | - **Core**: Should always be present for a functional, private, multi-model LLM stack (UI, router, model server, DB, proxy, doc extraction) 160 | - **Addons**: Should be one-command add (docker-compose, helm, etc.), and cover most common needs (vector db, SSO, monitoring, storage, automation) 161 | - **Extensions**: For advanced users, research, or verticals (fine-tuning, analytics, agent frameworks, integrations) 162 | 163 | --- 164 | 165 | ## Similar Projects & Inspiration 166 | ### Starter Projects 167 | #### 1. [philschmid/open-llm-stack](https://github.com/philschmid/open-llm-stack) 168 | - Focus: Production-ready open LLM stack (HuggingChat, TGI, MongoDB, Langchain, vLLM, OpenSearch) 169 | - Modular, cloud/on-prem, with examples for different providers 170 | - builds around huggingface chat - less cosutomizable especially with regards to enterprise security settings 171 | - lacks advanced rag integration 172 | 173 | #### 2. [tmc/mlops-community-llm-stack-hack](https://github.com/tmc/mlops-community-llm-stack-hack) 174 | - Focus: MLOps community hackathon starter for LLM stacks 175 | - Includes Go backend, Python services, vector visualization, Slack monitoring 176 | - unmaintained 177 | 178 | #### 3. [godatadriven/openllm-starter](https://github.com/godatadriven/openllm-starter) 179 | - Focus: GPU infra provisioning, Streamlit chat, Jupyter, GCP automation 180 | - Good for quickstart on cloud GPU 181 | - however lacks docling integration for advanved rag 182 | - lacks large community like open web ui for contionous updates 183 | 184 | ### advanced further ideas 185 | 186 | #### 4. [aishwaryaprabhat/BigBertha](https://github.com/aishwaryaprabhat/BigBertha) 187 | - Focus: LLMOps on Kubernetes (ArgoCD, Argo Workflows, Prometheus, MLflow, MinIO, Milvus, LlamaIndex) 188 | - End-to-end retraining, monitoring, vector ingestion 189 | 190 | #### 5. [IceBearAI/LLM-And-More](https://github.com/IceBearAI/LLM-And-More) 191 | - Focus: Plug-and-play, full LLM workflow (data, training, deployment, evaluation) 192 | - Modular, professional, with UI and workflow orchestration 193 | -------------------------------------------------------------------------------- /docs/THOUGHTS.md: -------------------------------------------------------------------------------- 1 | # LLM-in-a-Box: Summary and Analysis 2 | 3 | ## Key Points & Purpose 4 | 5 | ### Core Problem Being Solved 6 | - **Access Gap**: Researchers face a difficult choice between proprietary cloud APIs (cost, privacy, reproducibility concerns) vs complex self-hosting 7 | - **Gatekeeper Control**: Dominant providers control interface, responses, and tool access 8 | - **RSE Dependency**: Self-hosting typically requires dedicated Research Software Engineer support (not always available) 9 | - **Infrastructure Complexity**: Traditional self-hosting involves orchestrating multiple services 10 | 11 | ### Solution Approach 12 | - **Templated Framework**: Pre-configured, containerized stack of open-source tools 13 | - **Minimal Configuration**: Deployable with minimal setup complexity 14 | - **RSE Expertise Packaging**: Bundles RSE knowledge into reusable template 15 | - **Sovereignty**: Enables switching between commercial and self-hosted models seamlessly 16 | 17 | ## Technical Components & Architecture 18 | 19 | ### Core Tools Stack 20 | - **OpenWebUI**: Chat interface 21 | - **Ollama**: Model server for self-hosting 22 | - **LiteLLM**: Universal model router (single API for local + commercial models) 23 | - **Docling**: Document extraction for RAG pipelines 24 | - **Traefik**: Reverse proxy/load balancer 25 | - **Qdrant**: Vector database 26 | - **PostgreSQL**: Data persistence 27 | - **SOPS + Age**: Secret management 28 | - **Docker Compose**: Container orchestration 29 | - **Cruft**: Template updating 30 | 31 | ### Infrastructure Patterns 32 | - **Container-based deployment** (CPU and GPU profiles) 33 | - **Profile-based configuration** (different service combinations) 34 | - **Secret management** with encryption 35 | - **Template-driven project generation** 36 | 37 | ## Prerequisites & Installation Requirements 38 | 39 | ### Hard Dependencies 40 | - **Pixi**: Package manager (https://pixi.sh/latest/) 41 | - **OCI Container Runtime**: Docker Desktop or equivalent 42 | - **Git**: Version control 43 | - **OpenSSL**: For secret generation 44 | 45 | ### Installation Process 46 | ```bash 47 | git clone git@github.com:complexity-science-hub/llm-in-a-box-template.git 48 | cd llm-in-a-box-template 49 | pixi run tpl-init 50 | # Configure .env secrets 51 | # Start with docker compose profiles 52 | ``` 53 | 54 | ## Target Audience Analysis 55 | 56 | ### Aspirational Audience 57 | - **Research Groups**: Need sovereign AI capabilities 58 | - **Educational Institutions**: Teaching and research applications 59 | - **Individual Developers**: Personal AI infrastructure 60 | - **Domain Specialists**: Non-CS researchers needing AI tools 61 | 62 | ### Actual Audience Reality 63 | **Who This Really Works For:** 64 | - Researchers with existing Docker/containerization experience 65 | - Teams with at least one person comfortable with system administration 66 | - Organizations with basic DevOps infrastructure already in place 67 | 68 | **Who This Struggles To Serve:** 69 | - Pure domain specialists without systems background 70 | - Researchers on completely fresh installations 71 | - Users unfamiliar with container orchestration 72 | - Teams without dedicated technical support 73 | 74 | ## Foundational Knowledge Gaps & Barriers 75 | 76 | ### Core Systems Expertise Required 77 | 78 | #### Container & Orchestration Knowledge 79 | - **Docker fundamentals**: Images, containers, volumes, networks 80 | - **Docker Compose**: Service definitions, profiles, environment variables 81 | - **Container debugging**: Logs, exec access, networking troubleshooting 82 | 83 | #### Networking & Infrastructure 84 | - **Reverse proxy concepts**: Understanding Traefik configuration 85 | - **Port management**: Avoiding conflicts, understanding service discovery 86 | - **SSL/TLS basics**: Certificate management (future Kubernetes deployment) 87 | 88 | #### Security & Secret Management 89 | - **SOPS/Age cryptography**: Key generation, encryption workflows 90 | - **Environment variable security**: .env file management, secret rotation 91 | - **Container security**: Image scanning, runtime security 92 | 93 | #### System Administration 94 | - **Package management**: Understanding Pixi, resolving dependency conflicts 95 | - **File permissions**: Understanding volume mounts, user/group IDs 96 | - **Process management**: Service startup, health checking, resource monitoring 97 | 98 | #### Debugging & Troubleshooting 99 | - **Log analysis**: Multi-container log aggregation and interpretation 100 | - **Resource debugging**: Memory, GPU, storage issues 101 | - **Network troubleshooting**: Service discovery, port binding issues 102 | - **Dependency resolution**: When services fail to start or communicate 103 | 104 | ### Knowledge Bootstrapping Challenge 105 | 106 | **The Chicken-and-Egg Problem:** 107 | - **LLM Help Requires Working LLM**: Can't use AI to debug AI setup when it's broken 108 | - **System Expertise + AI**: Effective troubleshooting requires foundational knowledge PLUS AI assistance 109 | - **Implicit Assumptions**: "Simple" setup assumes significant prior knowledge 110 | - **Failure Cascades**: One small issue can make entire system inaccessible 111 | 112 | ### Installation Environment Variations 113 | 114 | #### Brand New Laptop Scenarios 115 | - **Missing dependencies**: Python, Docker, Git, package managers 116 | - **Permission issues**: Admin access, Docker daemon access 117 | - **Network restrictions**: Corporate firewalls, proxy configurations 118 | - **Resource constraints**: RAM, disk space, GPU availability 119 | 120 | #### Pre-configured System Scenarios 121 | - **Port conflicts**: Existing services using required ports 122 | - **Version conflicts**: Incompatible Docker/Python/tool versions 123 | - **Configuration interference**: Existing Traefik, database, or proxy setups 124 | - **Partial installations**: Broken previous attempts creating state conflicts 125 | 126 | ## Recommendations for Improved Accessibility 127 | 128 | ### Pre-Installation Assessment 129 | - **System compatibility checker**: Script to verify prerequisites 130 | - **Environment scanner**: Detect potential conflicts before installation 131 | - **Resource calculator**: Minimum RAM/disk/GPU requirements 132 | 133 | ### Installation Improvements 134 | - **Step-by-step guided setup**: Interactive installation wizard 135 | - **Environment-specific instructions**: Mac/Windows/Linux variations 136 | - **Rollback mechanisms**: Easy cleanup of failed installations 137 | - **Dependency auto-installation**: Automated prerequisite installation where possible 138 | 139 | ### Documentation Enhancements 140 | - **Troubleshooting decision trees**: "If X fails, try Y, then Z" 141 | - **Common failure scenarios**: Pre-documented solutions for typical issues 142 | - **Conceptual primers**: Brief explainers for Docker, networking, secrets management 143 | - **Video walkthroughs**: Visual setup guides for different platforms 144 | 145 | ### Community Support Infrastructure 146 | - **Installation validation**: Community-tested configurations 147 | - **Issue templates**: Structured bug reporting for setup problems 148 | - **Office hours/support channels**: Real-time help for setup issues 149 | - **Mentorship program**: Experienced users helping newcomers 150 | 151 | ## Bottom Line Assessment 152 | 153 | **Great Idea, Implementation Gap**: The project addresses a real need and provides genuine value, but the gap between "minimal configuration" and actual user experience reveals the classic CS assumption problem. Even with containerization abstracting much complexity, the foundational systems knowledge required for troubleshooting remains substantial. 154 | 155 | **Success Requires**: Either extensive pre-existing technical background OR significant institutional support OR very favorable environmental conditions (clean system, no conflicts, perfect documentation match). 156 | 157 | **Path Forward**: Focus on installation experience optimization, assumption documentation, and failure mode preparation rather than just the happy-path documentation. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLM in a box template 2 | 3 | This template provides an easy-to-deploy, self-hostable stack to make the generative AI ecosystem more approachable for research and education. 4 | It unifies access to both commercial and local models (via Ollama) through a flexible chat UI and a single API endpoint, enabling private, reproducible, and sovereign AI workflows. 5 | 6 | This template project contains: 7 | 8 | - A **flexible Chat UI** [OpenWebUI](https://docs.openwebui.com/) 9 | - **Document extraction** for refined RAG via [docling](https://docs.openwebui.com/features/document-extraction/docling) 10 | - https://github.com/docling-project/docling 11 | - https://github.com/docling-project/docling-serve 12 | - A **model router** [litellm](https://www.litellm.ai/) 13 | - A **model server** [ollama](https://ollama.com/) 14 | - State is stored in Postgres https://www.postgresql.org/ 15 | 16 | This template is built with [cruft](https://cruft.github.io/cruft/) so it is easy to update. 17 | Furthermore secrets are managed with [sops](https://github.com/getsops/sops) and [age](https://github.com/FiloSottile/age). 18 | We use [traefik](https://traefik.io/traefik/) as a reverse proxy. 19 | 20 | ![Icon](img/llm-in-a-box-icon.png) 21 | 22 | ## Publication 23 | 24 | This project is a contribution of the [Complexity Science Hub](https://complexity.science/) and is published under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). 25 | This template project is published to [JOSE (journal of open source education)](https://jose.theoj.org/). 26 | You find the DOI here: <>> 27 | 28 | The source code for the publication is availabe in the `docs/paper.md` file. 29 | 30 | ## Using the template 31 | 32 | We intend to eventually have more deployment processes scaffolded. However, this is usually fairly specific for a given organization. 33 | Therefore, we start out with `docker compose`. 34 | And perhaps later will add soemthing for Kubernets with fluxcd. 35 | 36 | You may want to customize a productiion grade deployment for SSL handling and SSO. 37 | But we hope this gives you a good starting point. 38 | 39 | See the [QUICKSTART.md](./docs/QUICKSTART.md) for an in-depth quick start guide. 40 | 41 | ### Prerequisites 42 | 43 | You have a working installation of pixi available. 44 | 45 | See https://pixi.sh/latest/ for installing. In short: 46 | 47 | - osx/linux: `curl -fsSL https://pixi.sh/install.sh | sh` 48 | - windows: `powershell -ExecutionPolicy ByPass -c "irm -useb https://pixi.sh/install.ps1 | iex"` 49 | 50 | Furthermore, you must have some kind of container runtime installed. 51 | For example: 52 | - Docker Desktop https://www.docker.com/products/docker-desktop/ 53 | - Rancher Desktop https://rancherdesktop.io/ 54 | - this uses nerdctl 55 | - all commands below should work with `nerdctl compose` instead of `docker compose` 56 | - Kubernetes 57 | - Podman 58 | - ... 59 | 60 | 61 | ### Applying the template 62 | #### Docker (simple) 63 | For now we assume you are using docker. 64 | 65 | > NOTICE: Pixi will bootstrap most required tools 66 | 67 | ```bash 68 | git clone https://github.com/complexity-science-hub/llm-in-a-box-template.git 69 | cd llm-in-a-box-template 70 | pixi run tpl-init-cruft 71 | 72 | # alternatively: 73 | # pip install cruft jinja2-ospath 74 | # cruft create git@github.com:complexity-science-hub/llm-in-a-box-template.git 75 | ``` 76 | 77 | Ensure you have checked out vllm (afterwards): 78 | 79 | ``` 80 | git clone --branch v0.10.0 --depth 1 https://github.com/vllm-project/vllm.git services/model-server/vllm 81 | ``` 82 | 83 | Now set up your secrets in the `.env` file. 84 | See a template below. 85 | 86 | Fast mode: 87 | 88 | ```bash 89 | cd <> 90 | ./generate-env.sh 91 | ``` 92 | 93 | ``` 94 | ROOT_DOMAIN=llminabox.geoheil.com 95 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22 96 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12 97 | TZ=Europe/Vienna 98 | 99 | # openssl rand -hex 32 for each 100 | LLM_ROUTER_DB=llm_router_db 101 | LLM_ROUTER_DB_USER=llm_router_db_user 102 | LLM_ROUTER_DB_PASSWORD=somepassword 103 | LITELLM_MASTER_KEY=somepassword 104 | LITELLM_SALT_KEY=somepassword 105 | LITELLM_UI_USERNAME=admin 106 | LITELLM_UI_PASSWORD=somepassword 107 | 108 | ROUTER_OPENAI_API_KEY=sk-sometoken 109 | ROUTER_ANTHROPIC_API_KEY=sk-someothertoken 110 | 111 | CHAT_UI_DB=chat_ui_db 112 | CHAT_UI_DB_USER=chat_ui_db_user 113 | CHAT_UI_DB_PASSWORD=somepassword 114 | CHAT_UI_SECRET_KEY=somepassword 115 | 116 | QDRANT__SERVICE__API_KEY=somepassword 117 | ``` 118 | 119 | 120 | Finally, start the servies 121 | 122 | 123 | ```bash 124 | # cpu 125 | docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up -d 126 | 127 | # gpu 128 | docker compose --profile llminabox --profile ollama-gpu --profile docling-gpu --profile vectordb-cpu up -d 129 | 130 | docker compose logs -f 131 | ``` 132 | 133 | finally in your browser navigate to: 134 | 135 | - connect to the model server (ollama via docker) 136 | - choose a suitable model from https://ollama.com 137 | - pull it `docker exec -it ollama ollama pull gemma3:4b` 138 | - verify it is here `docker exec -it ollama ollama ls` 139 | - Set up the model router available at http://llm.llminabox.geoheil.com/ui 140 | - log in with the credentials set up in the `.env` file from above 141 | - register the model: Ensure that []() 142 | ```bash 143 | - model_name: "gemma3:4b" 144 | litellm_params: 145 | model: "ollama_chat/gemma3:4b" 146 | api_base: "http://ollama:11434" 147 | ``` 148 | - possibly modify other model registrations 149 | - ensure you have API keys for all the models you want to use 150 | - Openai 151 | - Claude 152 | - Gemini 153 | - VertexAI: make sure to set the right google cloud project 154 | - stop and restart the model router 155 | - go to http://llm.llminabox.geoheil.com/ui 156 | - verify in the UI the model is detected 157 | - create a new API key (we will use that later in the chat UI) 158 | - name: `llminabox` 159 | - select the desired models (i.e. all for now) 160 | - possibly add some restrictions such as budget, rate limit, expiration, etc. 161 | - verify the desired models work in the llm playground http://llm.llminabox.geoheil.com/ui/?page=llm-playground 162 | - Lets make this now available for end users. We use Openwebui as a chat UI: http://chat.llminabox.geoheil.com 163 | - create a user (use some more sensible credentials) 164 | - admin 165 | - admin@example.com 166 | - test 167 | - mail delivery is not set up by default - you have to set up according to your own organization's needs. 168 | - in the admin settings 169 | - ensure the right model connection is defined - see http://chat.llminabox.geoheil.com/admin/settings/connections 170 | - under openai compatible - create a new connection 171 | - connection URL: `http://llmrouter:4000/v1` 172 | - api key: the one you created above 173 | - ensure the default openai API is disabled - and all traffic goes through the model router 174 | - Litelm model router: llm.llminabox.geoheil.com 175 | 176 | #### Kubernetes (advanced) 177 | 178 | This is a more advanced setup. 179 | We use [fluxcd](https://fluxcd.io/) and and [k3s](https://k3s.io/). 180 | 181 | 182 | For details see [Advanced setup](./docs/setup-advanced-k3s-fluxcd.md). 183 | 184 | > TODO: This is a work in progress. And will be updated in the future. For now, only the docker-compose based setup is available. 185 | 186 | ## Contribution 187 | 188 | Feel free to contribute - issues & even better pull requests are welcome. 189 | 190 | ### Developing the template 191 | 192 | To render an instance of the project: 193 | ```bash 194 | pixi run render-dev 195 | ``` 196 | 197 | see [rendered-template/*](rendered-template) for an example of how the peoject might look like. 198 | 199 | ```bash 200 | # brings up the CPU template quickly 201 | pixi run start-template 202 | ``` 203 | -------------------------------------------------------------------------------- /docs/QUICKSTART.md: -------------------------------------------------------------------------------- 1 | # LLM in a Box - Quick Start Guide 2 | 3 | This guide provides a comprehensive step-by-step process to get the LLM in a Box stack up and running quickly with Docker Compose. 4 | 5 | ## Prerequisites 6 | 7 | ### 1. Install Pixi (Package Manager) 8 | 9 | Pixi is required to manage dependencies and tooling. 10 | 11 | **macOS/Linux:** 12 | ```bash 13 | curl -fsSL https://pixi.sh/install.sh | sh 14 | ``` 15 | 16 | **Windows:** 17 | ```powershell 18 | powershell -ExecutionPolicy ByPass -c "irm -useb https://pixi.sh/install.ps1 | iex" 19 | ``` 20 | 21 | 📖 **Documentation:** https://pixi.sh/latest/ 22 | 23 | ### 2. Install Docker 24 | 25 | You need a container runtime. Choose one of: 26 | 27 | - **Docker Desktop** (Recommended for beginners) 28 | - Download: https://www.docker.com/products/docker-desktop/ 29 | - Includes Docker Compose 30 | 31 | - **Rancher Desktop** (Alternative) 32 | - Download: https://rancherdesktop.io/ 33 | - Open source alternative to Docker Desktop 34 | 35 | After installation, verify Docker is working: 36 | ```bash 37 | docker --version 38 | docker compose version 39 | ``` 40 | 41 | ## Initial Setup 42 | 43 | ### 1. Clone and Enter the Repository 44 | 45 | ```bash 46 | # If you haven't cloned yet 47 | git clone 48 | cd llm-in-a-box-template 49 | ``` 50 | 51 | ### 2. Run the initialization task 52 | 53 | ```bash 54 | pixi run tpl-init-cruft 55 | ``` 56 | 57 | When prompted answer the questions asked by `pixi run`. The defaults 58 | work for a first exploration. Note the value entered for `project_slug` 59 | and change to the directory created with that name, e.g. 60 | 61 | ```bash 62 | cd llm_in_a_box 63 | ``` 64 | 65 | ### 3. Set Up Environment Variables 66 | 67 | We've created an automated script to generate your `.env` file with secure values: 68 | 69 | ```bash 70 | # Generate .env from template 71 | ./generate-env.sh 72 | ``` 73 | 74 | This will create a `.env` file with: 75 | - Auto-generated secure passwords and keys 76 | - Pre-configured domain settings for `project.docker` 77 | - Automatically detected timezone 78 | - API keys from your environment (if OPENAI_API_KEY or ANTHROPIC_API_KEY are set) 79 | - Standard database naming conventions (litellm, openwebui) 80 | 81 | ### 4. Add Your API Keys 82 | 83 | You'll need to manually add API keys for the AI models you want to use: 84 | 85 | #### OpenAI API Key 86 | 1. Go to: https://platform.openai.com/api-keys 87 | 2. Click "Create new secret key" 88 | 3. Copy the key (starts with `sk-`) 89 | 4. Edit `.env` and replace `sk-CHANGEME_YOUR_OPENAI_KEY` with your key 90 | 91 | #### Anthropic (Claude) API Key 92 | 1. Go to: https://console.anthropic.com/settings/keys 93 | 2. Click "Create Key" 94 | 3. Copy the key (starts with `sk-ant-`) 95 | 4. Edit `.env` and replace `sk-CHANGEME_YOUR_ANTHROPIC_KEY` with your key 96 | 97 | #### self hosted models (Ollama) 98 | 99 | 1. connect to the ollama docker container (after executing the docker start commands you find later in the instructions) 100 | 2. connect to the container `docker exec -it ollama /bin/bash` 101 | 3. pull the desired models `ollama pull gemma2:2b` (or any other model you want to use) 102 | 103 | #### Other Providers (Optional) 104 | - **Google Gemini**: https://aistudio.google.com/apikey 105 | - **Vertex AI**: Requires GCP project setup 106 | 107 | ## Configure Local Domain 108 | 109 | To use `project.docker` instead of localhost, add this to your hosts file: 110 | 111 | **macOS/Linux:** 112 | ```bash 113 | echo "127.0.0.1 project.docker" | sudo tee -a /etc/hosts 114 | echo "127.0.0.1 llm.project.docker" | sudo tee -a /etc/hosts 115 | echo "127.0.0.1 chat.project.docker" | sudo tee -a /etc/hosts 116 | ``` 117 | 118 | **Windows (Run as Administrator):** 119 | ```powershell 120 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 project.docker" 121 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 llm.project.docker" 122 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 chat.project.docker" 123 | ``` 124 | 125 | ## Start the Services 126 | 127 | ### Basic Setup (Recommended for Testing) 128 | 129 | The `llminabox` profile includes the core services needed to get started: 130 | 131 | ```bash 132 | docker compose --profile llminabox up -d 133 | ``` 134 | 135 | This starts: 136 | - **Traefik Proxy** (port 80) - Reverse proxy for routing requests 137 | - **LiteLLM Router** - Model routing and API management 138 | - **Open WebUI** - Chat interface 139 | - **PostgreSQL databases** - For LiteLLM and Open WebUI 140 | 141 | ### Extended Setup (Optional Services) 142 | 143 | If you want additional capabilities, you can add: 144 | 145 | #### Local Model Support (Ollama) 146 | ```bash 147 | docker compose --profile llminabox --profile ollama-cpu up -d 148 | ``` 149 | 150 | #### Document Processing (Docling) 151 | ```bash 152 | docker compose --profile llminabox --profile docling-cpu up -d 153 | ``` 154 | 155 | #### Vector Database (Qdrant) 156 | ```bash 157 | docker compose --profile llminabox --profile vectordb-cpu up -d 158 | ``` 159 | 160 | #### Full Extended Setup 161 | ```bash 162 | docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up -d 163 | ``` 164 | 165 | ### GPU Setup (For Better Performance) 166 | 167 | ```bash 168 | docker compose --profile llminabox --profile ollama-gpu --profile docling-gpu --profile vectordb-cpu up -d 169 | ``` 170 | 171 | Monitor the logs: 172 | ```bash 173 | docker compose logs -f 174 | ``` 175 | 176 | ## Initial Configuration 177 | 178 | ### 1. Configure the Model Router (LiteLLM) 179 | 180 | 1. Open: http://llm.project.docker/ui 181 | 2. Login with credentials from your `.env`: 182 | - Username: `admin` (or what you set for `LITELLM_UI_USERNAME`) 183 | - Password: (check `LITELLM_UI_PASSWORD` in your `.env`) 184 | 185 | 3. Register external models (if you added API keys): 186 | - OpenAI models are auto-detected if API key is set 187 | - For Claude, add models like `claude-3-sonnet-20240229` 188 | 189 | 4. If you started Ollama, register your local models: 190 | - Click "Add Model" 191 | - Configure: 192 | ```yaml 193 | model_name: "gemma2:2b" 194 | litellm_params: 195 | model: "ollama_chat/gemma2:2b" 196 | api_base: "http://ollama:11434" 197 | ``` 198 | 199 | 5. Create an API key for the Chat UI: 200 | - Go to "Keys" section 201 | - Click "Create Key" 202 | - Name: `chat-ui-key` 203 | - Select all models you want to make available 204 | - Copy the generated key 205 | 206 | ### 2. Configure the Chat UI (Open WebUI) 207 | 208 | 1. Open: http://chat.project.docker 209 | 2. Create an admin account: 210 | - Username: `admin` 211 | - Email: `admin@example.com` 212 | - Password: (choose a secure password) 213 | 214 | 3. Configure model connection: 215 | - Go to Admin Settings → Connections 216 | - Add OpenAI-compatible connection: 217 | - URL: `http://llmrouter:4000/v1` 218 | - API Key: (paste the key from LiteLLM) 219 | - Disable the default OpenAI connection 220 | 221 | 4. Verify models are available: 222 | - Go to the chat interface 223 | - Check that your models appear in the model selector 224 | 225 | ### 3. Pull Local Models (If Using Ollama) 226 | 227 | If you started Ollama, download models to test with: 228 | 229 | ```bash 230 | # Pull a small, fast model for testing 231 | docker exec -it ollama ollama pull gemma2:2b 232 | 233 | # Or pull a larger, more capable model 234 | docker exec -it ollama ollama pull llama3.2:3b 235 | 236 | # Verify the model is downloaded 237 | docker exec -it ollama ollama list 238 | ``` 239 | 240 | ## Testing Your Setup 241 | 242 | 1. **Test via LiteLLM:** 243 | - Go to http://llm.project.docker/ui 244 | - Use the Playground to test models 245 | 246 | 2. **Test via Chat UI:** 247 | - Go to http://chat.project.docker 248 | - Start a new chat and select a model 249 | 250 | 3. **Test Ollama directly (if started):** 251 | ```bash 252 | docker exec -it ollama ollama run gemma2:2b "Hello, how are you?" 253 | ``` 254 | 255 | ## Service URLs 256 | 257 | - **Chat Interface**: http://chat.project.docker 258 | - **Model Router UI**: http://llm.project.docker/ui 259 | - **Model Router API**: http://llm.project.docker/v1 260 | 261 | ## Troubleshooting 262 | 263 | ### Services not starting? 264 | ```bash 265 | # Check service status 266 | docker compose ps 267 | 268 | # View detailed logs 269 | docker compose logs [service-name] 270 | 271 | # Common services: llmrouter, chatui, routerdb, chatuidb 272 | ``` 273 | 274 | ### Can't access URLs? 275 | - Ensure hosts file is updated (see Configure Local Domain) 276 | - Check if port 80 is available: `lsof -i :80` (macOS/Linux) 277 | - Try accessing via localhost: http://localhost instead 278 | - Verify the proxy is running: `docker compose logs proxy` 279 | 280 | ### Models not showing up? 281 | 1. Ensure API keys are properly set in `.env` 282 | 2. If using Ollama, ensure models are pulled 283 | 3. Restart the model router after adding models: 284 | ```bash 285 | docker compose restart llmrouter 286 | ``` 287 | 288 | ### Database connection issues? 289 | - Wait a few moments for databases to initialize 290 | - Check database logs: `docker compose logs routerdb chatuidb` 291 | 292 | ## Next Steps 293 | 294 | 1. **Add local models**: Start Ollama and explore https://ollama.com/library 295 | 2. **Configure document extraction**: Start Docling for advanced RAG capabilities 296 | 3. **Set up vector search**: Start Qdrant for semantic search 297 | 4. **Production setup**: 298 | - Enable HTTPS with proper certificates 299 | - Configure authentication (Keycloak is included but disabled) 300 | - Set stronger passwords in `.env` 301 | 302 | ## Useful Commands 303 | 304 | ```bash 305 | # Stop all services 306 | docker compose down 307 | 308 | # Stop and remove all data 309 | docker compose down -v 310 | 311 | # Update services 312 | docker compose pull 313 | docker compose up -d 314 | 315 | # View resource usage 316 | docker stats 317 | 318 | # Execute commands in containers 319 | docker exec -it llmrouter /bin/bash 320 | docker exec -it chatui /bin/bash 321 | ``` 322 | 323 | ## Support 324 | 325 | - Check the main README for architectural details 326 | - Report issues at: https://github.com/complexity-science-hub/llm-in-a-box-template/issues 327 | - LiteLLM docs: https://docs.litellm.ai/ 328 | - Open WebUI docs: https://docs.openwebui.com/ 329 | - Ollama docs: https://ollama.com/ 330 | - docling docs: https://docling-project.github.io/docling/ -------------------------------------------------------------------------------- /{{cookiecutter.project_slug}}/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # https://geshan.com.np/blog/2025/02/ollama-docker-compose/ 2 | services: 3 | proxy: 4 | image: traefik:v3.5 5 | container_name: proxy 6 | ports: 7 | - "80:80" 8 | #- "443:443" 9 | environment: 10 | # - CF_API_EMAIL=$CLOUDFLARE_EMAIL 11 | # - CF_DNS_API_TOKEN=$CLOUDFLARE_API_KEY 12 | - TZ=${TZ} 13 | - ROOT_DOMAIN=${ROOT_DOMAIN} 14 | profiles: 15 | - proxy 16 | - llminabox 17 | - auth 18 | - chat-ui 19 | volumes: 20 | - "/var/run/docker.sock:/var/run/docker.sock:ro" 21 | - ./services/proxy/rules:/rules 22 | - proxy_logs:/logs 23 | #- proxy_certs:/letsencrypt 24 | networks: 25 | - proxy_net 26 | restart: unless-stopped 27 | command: 28 | # TODO disable 29 | # - --log.level=DEBUG 30 | - "--providers.docker=true" 31 | - --providers.docker.endpoint=unix:///var/run/docker.sock # Use Docker Socket Proxy instead for improved security 32 | - "--providers.docker.exposedbydefault=false" 33 | - "--providers.docker.network=proxy_net" 34 | - "--entrypoints.web.address=:80" 35 | #- --accesslog.filepath=/logs/access.log 36 | - "--entrypoints.web.asdefault=true" 37 | # - "--entrypoints.websecure.address=:443" 38 | # - --entrypoints.websecure.http.tls.options=tls-opts@file 39 | # # Allow these IPs to set the X-Forwarded-* headers - Cloudflare IPs: https://www.cloudflare.com/ips/ 40 | # - --entrypoints.websecure.forwardedHeaders.trustedIPs=$CLOUDFLARE_IPS,$LOCAL_IPS 41 | 42 | - --providers.file.directory=/rules 43 | - --providers.file.watch=true # Only works on top level files in the rules folder 44 | #- "--providers.file.filename=/etc/traefik/dynamic_conf.toml" 45 | #- "--providers.file.filename=/rules/dynamic_conf.toml" 46 | # - --certificatesResolvers.dns-cloudflare.acme.email=$CF_API_EMAIL 47 | # - --certificatesResolvers.dns-cloudflare.acme.storage=/letsencrypt/acme.json 48 | # - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.provider=cloudflare 49 | # - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.resolvers=1.1.1.1:53,1.0.0.1:53 50 | # - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.delayBeforeCheck=90 # To delay DNS check and reduce LE hitrate 51 | # - --entryPoints.web.http.redirections.entrypoint.to=websecure 52 | # - --entryPoints.web.http.redirections.entrypoint.scheme=https 53 | # auth: 54 | # image: quay.io/keycloak/keycloak:26.2 55 | # depends_on: 56 | # authdb: 57 | # condition: service_healthy 58 | # container_name: keycloak 59 | # restart: unless-stopped 60 | # # Just-in-time build + runtime options 61 | # command: > 62 | # start 63 | # --db=postgres 64 | # --hostname auth.${ROOT_DOMAIN} 65 | # --http-enabled=true 66 | # --proxy-headers=xforwarded 67 | # --health-enabled=true 68 | # --metrics-enabled=true 69 | 70 | # environment: 71 | # KC_DB_URL: jdbc:postgresql://authdb:5432/${AUTH_DB} 72 | # KC_DB_USERNAME: ${AUTH_DB_USER} 73 | # KC_DB_PASSWORD: ${AUTH_DB_PASSWORD} 74 | # KC_LOG_LEVEL: INFO 75 | # KC_BOOTSTRAP_ADMIN_USERNAME: ${KC_BOOTSTRAP_ADMIN_USERNAME} 76 | # KC_BOOTSTRAP_ADMIN_PASSWORD: ${KC_BOOTSTRAP_ADMIN_PASSWORD} 77 | # #ports: 78 | # # - "8080:8080" # HTTP left open for the reverse-proxy on the same LAN 79 | # # - "9000:9000" # management port: /health and /metrics 80 | # profiles: 81 | # - auth 82 | # networks: 83 | # - proxy_net 84 | # - auth_net 85 | 86 | modelserverollama-cpu: 87 | image: ollama/ollama:0.12.11 88 | volumes: 89 | - ollama:/root/.ollama 90 | container_name: ollama 91 | restart: unless-stopped 92 | profiles: 93 | - ollama-cpu 94 | - llx 95 | networks: 96 | model_router_net: 97 | aliases: 98 | - ollama 99 | modelserverollama-gpu: 100 | image: ollama/ollama:0.12.11 101 | #ports: 102 | # - 11434:11434 103 | volumes: 104 | - ollama:/root/.ollama 105 | container_name: ollama 106 | restart: unless-stopped 107 | profiles: 108 | - ollama-gpu 109 | networks: 110 | model_router_net: 111 | aliases: 112 | - ollama 113 | deploy: 114 | resources: 115 | reservations: 116 | devices: 117 | - driver: nvidia 118 | count: all 119 | capabilities: [gpu] 120 | 121 | modelservervllm: 122 | build: 123 | # docker build -f docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g . 124 | # docker build -f services/model-server/vllm/docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g services/model-server/vllm 125 | context: ./services/model-server/vllm 126 | # TODO select the right architecture/accelerator support here 127 | # TODO document & expose as easy to use docker compose profile flags 128 | # TODO add gpu claims here in case of GPU support 129 | dockerfile: docker/Dockerfile.arm 130 | target: build 131 | shm_size: "4gb" 132 | image: llminabox/modelserver:v0.10.0-cpu-arm 133 | container_name: modelserver 134 | hostname: modelserver 135 | environment: 136 | HF_HOME: /root/models 137 | HUGGING_FACE_HUB_TOKEN: ${HUGGING_FACE_HUB_TOKEN} 138 | # In case a enterprise artifact repository and huggingface mirror is used 139 | # HF_ENDPOINT: ${HF_ENDPOINT} 140 | VLLM_CPU_OMP_THREADS_BIND: "all" # or "0-3|4-7" 141 | VLLM_CPU_KVCACHE_SPACE: "16" 142 | # cpuset: "0-3" 143 | command: 144 | - "--model" 145 | - "meta-llama/Llama-3.2-1B-Instruct" 146 | # "microsoft/Phi-4-mini-instruct" 147 | - "--port" 148 | - "8000" 149 | # restart: unless-stopped 150 | ports: 151 | - "8000:8000" 152 | profiles: 153 | - model-server-cpu-arm 154 | volumes: 155 | - vllm:/root/models 156 | modelservervllm-gpu: 157 | image: vllm/vllm-openai:v0.11.0 158 | container_name: modelserver 159 | hostname: modelserver 160 | environment: 161 | HF_HOME: /root/.vllm 162 | HUGGING_FACE_HUB_TOKEN: ${HUGGING_FACE_HUB_TOKEN} 163 | # In case a enterprise artifact repository and huggingface mirror is used 164 | # HF_ENDPOINT: ${HF_ENDPOINT} 165 | VLLM_CPU_OMP_THREADS_BIND: "all" # or "0-3|4-7" 166 | VLLM_CPU_KVCACHE_SPACE: "16" 167 | # cpuset: "0-3" 168 | command: 169 | - "--model" 170 | - "meta-llama/Llama-3.2-1B-Instruct" 171 | # "microsoft/Phi-4-mini-instruct" 172 | - "--port" 173 | - "8000" 174 | # restart: unless-stopped 175 | ports: 176 | - "8000:8000" 177 | profiles: 178 | - model-server-gpu 179 | volumes: 180 | - vllm:/root/.vllm 181 | 182 | 183 | # authdb: 184 | # image: postgres:17.5-alpine3.22 185 | # container_name: authdb 186 | # restart: unless-stopped 187 | # environment: 188 | # POSTGRES_DB: ${AUTH_DB} 189 | # POSTGRES_USER: ${AUTH_DB_USER} 190 | # POSTGRES_PASSWORD: ${AUTH_DB_PASSWORD} 191 | # healthcheck: 192 | # test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ] 193 | # interval: 5s 194 | # timeout: 10s 195 | # retries: 10 196 | # profiles: 197 | # - auth 198 | # volumes: 199 | # - llm_model_auth_db:/var/lib/postgresql/data 200 | # networks: 201 | # - auth_net 202 | routerdb: 203 | image: postgres:18.1-alpine3.22 204 | container_name: routerdb 205 | restart: unless-stopped 206 | environment: 207 | POSTGRES_DB: ${LLM_ROUTER_DB} 208 | POSTGRES_USER: ${LLM_ROUTER_DB_USER} 209 | POSTGRES_PASSWORD: ${LLM_ROUTER_DB_PASSWORD} 210 | healthcheck: 211 | test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ] 212 | interval: 5s 213 | timeout: 10s 214 | retries: 10 215 | profiles: 216 | - model-router 217 | - llminabox 218 | - llx 219 | volumes: 220 | - llm_model_router_db:/var/lib/postgresql/data 221 | networks: 222 | - model_router_net 223 | 224 | llm_router: 225 | image: litellm/litellm:v1.79.1-stable 226 | platform: linux/amd64 227 | container_name: llmrouter 228 | restart: unless-stopped 229 | environment: 230 | - LITELLM_MASTER_KEY=sk-${LITELLM_MASTER_KEY} 231 | - LITELLM_SALT_KEY=${LITELLM_SALT_KEY} 232 | - STORE_MODEL_IN_DB="True" 233 | - DATABASE_URL=postgres://${LLM_ROUTER_DB_USER}:${LLM_ROUTER_DB_PASSWORD}@routerdb:5432/${LLM_ROUTER_DB} 234 | - OPENAI_API_KEY=${ROUTER_OPENAI_API_KEY} 235 | - ANTHROPIC_API_KEY=${ROUTER_ANTHROPIC_API_KEY} 236 | - LITELLM_MODE="PRODUCTION" 237 | - UI_USERNAME=${LITELLM_UI_USERNAME} 238 | - UI_PASSWORD=${LITELLM_UI_PASSWORD} 239 | profiles: 240 | - model-router 241 | - llminabox 242 | - llx 243 | networks: 244 | - model_router_net 245 | - llm_net 246 | - proxy_net 247 | expose: 248 | - "4000" 249 | depends_on: 250 | routerdb: 251 | condition: service_healthy 252 | #- ollama 253 | healthcheck: 254 | test: [ "CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1" ] 255 | interval: 30s 256 | timeout: 10s 257 | retries: 3 258 | start_period: 40s 259 | #ports: 260 | # - "4000:4000" 261 | # :4000 is api; /ui -> UI 262 | volumes: 263 | - ./services/llm-router/litellm_config.yml:/app/config.yaml 264 | - ./services/llm-router/google_vertexai.json:/secrets/google_vertexai.json 265 | # --detailed_debug 266 | command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "2" ] 267 | labels: 268 | - "traefik.enable=true" 269 | - "traefik.http.routers.litellm.rule=Host(`llm.${ROOT_DOMAIN}`)" 270 | - "traefik.http.services.litellm.loadbalancer.server.port=4000" 271 | # - "traefik.http.routers.litellm.tls.certresolver=dns-cloudflare" 272 | - "traefik.http.routers.litellm.middlewares=chain-no-auth@file" 273 | 274 | chatuidb: 275 | image: postgres:18.1-alpine3.22 276 | container_name: chatuidb 277 | restart: unless-stopped 278 | environment: 279 | POSTGRES_DB: ${CHAT_UI_DB} 280 | POSTGRES_USER: ${CHAT_UI_DB_USER} 281 | POSTGRES_PASSWORD: ${CHAT_UI_DB_PASSWORD} 282 | healthcheck: 283 | test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ] 284 | interval: 5s 285 | timeout: 10s 286 | retries: 10 287 | profiles: 288 | - chat-ui 289 | - llminabox 290 | volumes: 291 | - llm_chat_ui_db:/var/lib/postgresql/data 292 | networks: 293 | - chatui_net 294 | 295 | docling-cpu: 296 | image: quay.io/docling-project/docling-serve-cpu:v1.1.0 297 | container_name: docling 298 | #ports: 299 | # - 5001:5001 300 | profiles: 301 | - docling-cpu 302 | networks: 303 | - chatui_net 304 | healthcheck: 305 | test: ["CMD", "curl", "-f", "http://localhost:5001/health"] 306 | interval: 15s 307 | retries: 5 308 | 309 | docling-gpu: 310 | image: quay.io/docling-project/docling-serve-cu124:v1.1.0 311 | container_name: docling 312 | #ports: 313 | # - 5001:5001 314 | profiles: 315 | - docling-gpu 316 | networks: 317 | - chatui_net 318 | healthcheck: 319 | test: ["CMD", "curl", "-f", "http://localhost:5001/health"] 320 | interval: 15s 321 | retries: 5 322 | deploy: 323 | resources: 324 | reservations: 325 | devices: 326 | - driver: nvidia 327 | count: all 328 | capabilities: [gpu] 329 | 330 | vectordb-qdrant-cpu: 331 | image: qdrant/qdrant:v1.15.5 332 | container_name: qdrant 333 | hostname: qdrant 334 | restart: unless-stopped 335 | environment: 336 | QDRANT__SERVICE__API_KEY: ${QDRANT__SERVICE__API_KEY} 337 | profiles: 338 | - vectordb-cpu 339 | networks: 340 | - chatui_net 341 | # configs: 342 | # - source: qdrant_config 343 | # target: /qdrant/config/production.yaml 344 | volumes: 345 | - qdrant:/qdrant/storage 346 | # vectordb-qdrant-cpu-healthcheck: 347 | # restart: always 348 | # image: curlimages/curl:latest 349 | # entrypoint: ["/bin/sh", "-c", "--", "while true; do sleep 30; done;"] 350 | # profiles: 351 | # - vectordb-cpu 352 | # depends_on: 353 | # - qdrant 354 | # healthcheck: 355 | # test: ["CMD", "curl", "-f", "http://qdrant:6333/readyz"] 356 | # interval: 10s 357 | # timeout: 2s 358 | # retries: 5 359 | llm_chat_ui: 360 | # TODO: create GPU variation as well for faster whisper and embeddings 361 | # TODO add a vector database elastic? qdrant? 362 | image: ghcr.io/open-webui/open-webui:v0.6.36 363 | container_name: chatui 364 | volumes: 365 | - open-webui:/app/backend/data 366 | depends_on: 367 | chatuidb: 368 | condition: service_healthy 369 | llm_router: 370 | condition: service_healthy 371 | environment: 372 | WEBUI_SECRET_KEY: ${CHAT_UI_SECRET_KEY} 373 | CORS_ALLOW_ORIGIN: http://chat.${ROOT_DOMAIN} 374 | CONTENT_EXTRACTION_ENGINE: docling 375 | DOCLING_SERVER_URL: http://docling:5001 376 | DATABASE_URL: postgresql://${CHAT_UI_DB_USER}:${CHAT_UI_DB_PASSWORD}@chatuidb:5432/${CHAT_UI_DB} 377 | ENABLE_IMAGE_GENERATION: "True" 378 | ENABLE_CHANNELS: "True" 379 | IMAGE_GENERATION_ENGINE: "openai" 380 | IMAGE_SIZE: "1024x1024" 381 | IMAGE_GENERATION_MODEL: "dall-e-3" 382 | ENABLE_OLLAMA_API: "False" 383 | ENABLE_OPENAI_API: "True" 384 | #OPENAI_API_BASE_URLS: "https://api.openai.com/v1;http://llmrouter:4000/v1" 385 | #OPENAI_API_KEYS: "${ROUTER_OPENAI_API_KEY};${LLM_ROUTER_KEY_FOR_CHAT_UI}" 386 | IMAGES_OPENAI_API_KEY: "${ROUTER_OPENAI_API_KEY}" 387 | VECTOR_DB: qdrant 388 | QDRANT_API_KEY: ${QDRANT__SERVICE__API_KEY} 389 | QDRANT_URI: http://qdrant:6333 390 | ENABLE_QDRANT_MULTITENANCY_MODE: "True" 391 | # ENABLE_OPENAI_API, OPENAI_API_KEY, OPENAI_API_KEYS 392 | # VECTOR_DB, ELASTICSEARCH_URL 393 | # - 'OLLAMA_BASE_URL=http://ollama:11434' 394 | restart: unless-stopped 395 | profiles: 396 | - chat-ui 397 | - llminabox 398 | networks: 399 | - proxy_net 400 | - llm_net 401 | - chatui_net 402 | labels: 403 | - "traefik.enable=true" 404 | - "traefik.http.routers.chat.rule=Host(`chat.${ROOT_DOMAIN}`)" 405 | - "traefik.http.routers.chat.service=chat" 406 | - "traefik.http.services.chat.loadbalancer.server.port=8080" 407 | - "traefik.http.routers.chat.middlewares=chain-no-auth@file" 408 | 409 | networks: 410 | proxy_net: 411 | name: proxy_net 412 | # external: true 413 | model_router_net: 414 | auth_net: 415 | llm_net: 416 | chatui_net: 417 | 418 | volumes: 419 | ollama: {} 420 | vllm: {} 421 | llm_model_router_db: {} 422 | #llm_model_auth_db: {} 423 | llm_chat_ui_db: {} 424 | proxy_logs: {} 425 | proxy_certs: {} 426 | open-webui: {} 427 | qdrant: {} 428 | --------------------------------------------------------------------------------