├── .gitmodules
├── LICENSE
├── public-age-keys.txt
├── {{cookiecutter.project_slug}}
    ├── public-age-keys.txt
    ├── scripts
    │   ├── simplify-public-keys.sh
    │   ├── secops_config.sh
    │   ├── git_clean_local_branches.sh
    │   ├── decrypt_secrets.sh
    │   └── encrypt_secrets.sh
    ├── services
    │   ├── proxy
    │   │   └── rules
    │   │   │   ├── middlewares-chains.yml
    │   │   │   ├── tls-opts.yml
    │   │   │   └── middlewares.yml
    │   ├── model-server
    │   │   └── README.md
    │   ├── keykloak
    │   │   └── Dockerfile
    │   └── llm-router
    │   │   ├── litellm_config.yml
    │   │   └── google_vertexai.json.enc
    ├── .github
    │   └── ISSUE_TEMPLATE
    │   │   └── user-story.md
    ├── yamllintconfig.yaml
    ├── documentation
    │   └── secops
    │   │   └── add-key.md
    ├── Makefile
    ├── pyproject.toml
    ├── .env.enc
    ├── generate-env.sh
    ├── .gitignore
    └── docker-compose.yml
├── img
    └── llm-in-a-box-icon.png
├── scripts
    ├── simplify-public-keys.sh
    ├── handle-dev-secrets.sh
    ├── secops_config.sh
    ├── decrypt_secrets.sh
    └── encrypt_secrets.sh
├── docs
    ├── setup-advanced-k3s-fluxcd.md
    ├── paper.bib
    ├── LOCALDNS.md
    ├── cilogon-integration.md
    ├── paper.md
    ├── DIAGRAM.md
    ├── THOUGHTS.md
    └── QUICKSTART.md
├── services
    ├── proxy
    │   └── rules
    │   │   ├── middlewares-chains.yml
    │   │   ├── tls-opts.yml
    │   │   └── middlewares.yml
    ├── keykloak
    │   └── Dockerfile
    ├── model-server
    │   └── README.md
    └── llm-router
    │   ├── litellm_config.yml
    │   └── google_vertexai.json.enc
├── .github
    ├── workflows
    │   └── render-pdf.yml
    └── ISSUE_TEMPLATE
    │   └── user-story.md
├── cookiecutter.json
├── yamllintconfig.yaml
├── .env_template
├── pyproject.toml
├── Makefile
├── .gitignore
└── README.md


/.gitmodules:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache 2.0


--------------------------------------------------------------------------------
/public-age-keys.txt:
--------------------------------------------------------------------------------
1 | # Georg
2 | age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/public-age-keys.txt:
--------------------------------------------------------------------------------
1 | # Georg
2 | age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0


--------------------------------------------------------------------------------
/img/llm-in-a-box-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/complexity-science-hub/llm-in-a-box-template/HEAD/img/llm-in-a-box-icon.png


--------------------------------------------------------------------------------
/scripts/simplify-public-keys.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | grep -v '^\#' public-age-keys.txt | tr -d '[:space:]' | tr '\n' ',' | sed 's/,$//'


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/scripts/simplify-public-keys.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | grep -v '^\#' public-age-keys.txt | tr -d '[:space:]' | tr '\n' ',' | sed 's/,$//'


--------------------------------------------------------------------------------
/scripts/handle-dev-secrets.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cp key.txt rendered-template/llm_in_a_box/
4 | cd rendered-template/llm_in_a_box
5 | make secrets-decrypt
6 | cd ../../


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/scripts/secops_config.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Define files to encrypt/decrypt
4 | FILES_TO_ENCRYPT=".env services/llm-router/google_vertexai.json"
5 | 


--------------------------------------------------------------------------------
/scripts/secops_config.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Define files to encrypt/decrypt
4 | FILES_TO_ENCRYPT="{{cookiecutter.project_slug}}/.env {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json"
5 | 


--------------------------------------------------------------------------------
/docs/setup-advanced-k3s-fluxcd.md:
--------------------------------------------------------------------------------
 1 | # Advanced setup with k3s and fluxcd
 2 | 
 3 | 
 4 | This is a more advanced setup.
 5 | We use [fluxcd](https://fluxcd.io/) and [k3s](https://k3s.io/).
 6 | 
 7 | > TODO eventuall add the instructions here
 8 | 
 9 | ## Setup of fluxcd
10 | 
11 | ## setup of k3s


--------------------------------------------------------------------------------
/scripts/decrypt_secrets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./scripts/secops_config.sh
 4 | 
 5 | export SOPS_AGE_KEY_FILE=key.txt
 6 | 
 7 | for file in ${FILES_TO_ENCRYPT}; do
 8 |     echo "Decrypting: $file"
 9 |     sops --decrypt --input-type binary --output-type binary "$file.enc" > "$file"
10 | done
11 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/scripts/git_clean_local_branches.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git branch | grep -v "^\*" | grep -v master | grep -v main | xargs git branch -d
4 | git fetch -p ; git branch -r | awk '{print $1}' | egrep -v -f /dev/fd/0 <(git branch -vv | grep origin) | awk '{print $1}' | xargs git branch -d


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/scripts/decrypt_secrets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./scripts/secops_config.sh
 4 | 
 5 | export SOPS_AGE_KEY_FILE=key.txt
 6 | 
 7 | for file in ${FILES_TO_ENCRYPT}; do
 8 |     echo "Decrypting: $file"
 9 |     sops --decrypt --input-type binary --output-type binary "$file.enc" > "$file"
10 | done
11 | 


--------------------------------------------------------------------------------
/scripts/encrypt_secrets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./scripts/secops_config.sh
 4 | 
 5 | SOPS_AGE_RECIPIENTS=$(./scripts/simplify-public-keys.sh)
 6 | echo "recipient: ${SOPS_AGE_RECIPIENTS}"
 7 | 
 8 | for file in ${FILES_TO_ENCRYPT}; do
 9 |     echo "Encrypting: $file"
10 |     sops --encrypt --age ${SOPS_AGE_RECIPIENTS} --input-type binary --output-type binary "$file" > "$file.enc"
11 | done
12 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/scripts/encrypt_secrets.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source ./scripts/secops_config.sh
 4 | 
 5 | SOPS_AGE_RECIPIENTS=$(./scripts/simplify-public-keys.sh)
 6 | echo "recipient: ${SOPS_AGE_RECIPIENTS}"
 7 | 
 8 | for file in ${FILES_TO_ENCRYPT}; do
 9 |     echo "Encrypting: $file"
10 |     sops --encrypt --age ${SOPS_AGE_RECIPIENTS} --input-type binary --output-type binary "$file" > "$file.enc"
11 | done
12 | 


--------------------------------------------------------------------------------
/services/proxy/rules/middlewares-chains.yml:
--------------------------------------------------------------------------------
 1 | http:
 2 |   middlewares:
 3 |     chain-no-auth:
 4 |       chain:
 5 |         middlewares:
 6 |           - middlewares-rate-limit
 7 |           - middlewares-secure-headers
 8 |           - middlewares-compress
 9 | 
10 |     chain-basic-auth:
11 |       chain:
12 |         middlewares:
13 |           - middlewares-rate-limit
14 |           #- middlewares-https-redirectscheme
15 |           - middlewares-secure-headers
16 |           - middlewares-basic-auth
17 |           - middlewares-compress
18 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/proxy/rules/middlewares-chains.yml:
--------------------------------------------------------------------------------
 1 | http:
 2 |   middlewares:
 3 |     chain-no-auth:
 4 |       chain:
 5 |         middlewares:
 6 |           - middlewares-rate-limit
 7 |           - middlewares-secure-headers
 8 |           - middlewares-compress
 9 | 
10 |     chain-basic-auth:
11 |       chain:
12 |         middlewares:
13 |           - middlewares-rate-limit
14 |           #- middlewares-https-redirectscheme
15 |           - middlewares-secure-headers
16 |           - middlewares-basic-auth
17 |           - middlewares-compress
18 | 


--------------------------------------------------------------------------------
/.github/workflows/render-pdf.yml:
--------------------------------------------------------------------------------
 1 | name: Generate PDF from Markdown
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | jobs:
 7 |   paper:
 8 |     runs-on: ubuntu-latest
 9 |     name: Paper Draft
10 |     steps:
11 |       - name: Checkout repository
12 |         uses: actions/checkout@v4
13 |       - name: TeX and PDF
14 |         uses: docker://openjournals/paperdraft:latest
15 |         with:
16 |           journal: joss
17 |           args: '-k ./docs/paper.md'
18 |       - name: Upload PDF to Artifacts
19 |         uses: actions/upload-artifact@v4
20 |         with:
21 |           name: generated-paper
22 |           path: ./docs/paper.pdf


--------------------------------------------------------------------------------
/services/proxy/rules/tls-opts.yml:
--------------------------------------------------------------------------------
 1 | tls:
 2 |   options:
 3 |     tls-opts:
 4 |       minVersion: VersionTLS12
 5 |       cipherSuites:
 6 |         - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
 7 |         - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
 8 |         - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
 9 |         - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
10 |         - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
11 |         - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
12 |         - TLS_AES_128_GCM_SHA256
13 |         - TLS_AES_256_GCM_SHA384
14 |         - TLS_CHACHA20_POLY1305_SHA256
15 |         - TLS_FALLBACK_SCSV # Client is doing version fallback. See RFC 7507
16 |       curvePreferences:
17 |         - CurveP521
18 |         - CurveP384
19 |       sniStrict: true
20 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/proxy/rules/tls-opts.yml:
--------------------------------------------------------------------------------
 1 | tls:
 2 |   options:
 3 |     tls-opts:
 4 |       minVersion: VersionTLS12
 5 |       cipherSuites:
 6 |         - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256
 7 |         - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
 8 |         - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384
 9 |         - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384
10 |         - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305
11 |         - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305
12 |         - TLS_AES_128_GCM_SHA256
13 |         - TLS_AES_256_GCM_SHA384
14 |         - TLS_CHACHA20_POLY1305_SHA256
15 |         - TLS_FALLBACK_SCSV # Client is doing version fallback. See RFC 7507
16 |       curvePreferences:
17 |         - CurveP521
18 |         - CurveP384
19 |       sniStrict: true
20 | 


--------------------------------------------------------------------------------
/cookiecutter.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "project_name": "LLM in a box",
 3 |     "organization": "myorg",
 4 |     "project_slug": "{{ cookiecutter.project_name.lower().replace(' ', '_') }}",
 5 |     "project_slug_pixi": "{{ cookiecutter.project_name.lower().replace(' ', '-') }}",
 6 |     "author": "Your name",
 7 |     "author_email": "Your@email.com",
 8 |     "cloudflare_api_key": "<<your-secret-key>>",
 9 |     "cloudflare_email": "<<your-e-mail>>",
10 |     "root_domain": "<<yourdomain.com>>",
11 |     "time_zone": "Europe/Vienna",
12 |     "state_path": "{{ '.' | abspath }}/{{ cookiecutter.project_slug }}-z_state",
13 | 
14 |     "_extensions": [
15 |       "jinja2_ospath.extensions.OSPathExtension"
16 |     ],
17 |     "_copy_without_render": [
18 |       "services/model-server/vllm"
19 |     ]
20 |   }


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/user-story.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: user story
 3 | about: Describe this issue template's purpose here.
 4 | title: "[user story] ..."
 5 | labels: user story
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ---
11 | 
12 | As a ..., I want to ..., so I can ...
13 | 
14 | # Acceptance criteria (given when then)
15 | 
16 | - [ ] This is something that can be verified to show that this user story is satisfied.
17 | 
18 | # details
19 | 
20 | ## assumptions
21 | ## dependencies
22 | ## related documents
23 | ## notes
24 | 
25 | # Sprint Ready Checklist 
26 | 1. - [ ] Acceptance criteria defined 
27 | 2. - [ ] Team understands acceptance criteria 
28 | 3. - [ ] Team has defined solution / steps to satisfy acceptance criteria 
29 | 4. - [ ] Acceptance criteria is verifiable / testable 
30 | 5. - [ ] External / 3rd Party dependencies identified
31 | 


--------------------------------------------------------------------------------
/yamllintconfig.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | extends: default
 4 | 
 5 | rules:
 6 |   braces:
 7 |     level: warning
 8 |     max-spaces-inside: 1
 9 |   brackets:
10 |     level: warning
11 |     max-spaces-inside: 1
12 |   colons:
13 |     level: warning
14 |   commas:
15 |     level: warning
16 |   comments: disable
17 |   comments-indentation: disable
18 |   document-start: disable
19 |   empty-lines:
20 |     level: warning
21 |   hyphens:
22 |     level: warning
23 |   indentation:
24 |     level: warning
25 |     indent-sequences: consistent
26 |   line-length:
27 |     max: 196
28 |     level: warning
29 |     allow-non-breakable-inline-mappings: true
30 |   truthy: disable
31 | 
32 | ignore: |
33 |   **/node_modules/**
34 |   **/vendor/**
35 |   **/dist/**
36 |   **/build/**
37 |   .pixi/envs/
38 |   **/.pixi/envs/**
39 |   **/dbt_packages/**
40 |   **.pixi/solve-group-envs/**


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/.github/ISSUE_TEMPLATE/user-story.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: user story
 3 | about: Describe this issue template's purpose here.
 4 | title: "[user story] ..."
 5 | labels: user story
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ---
11 | 
12 | As a ..., I want to ..., so I can ...
13 | 
14 | # Acceptance criteria (given when then)
15 | 
16 | - [ ] This is something that can be verified to show that this user story is satisfied.
17 | 
18 | # details
19 | 
20 | ## assumptions
21 | ## dependencies
22 | ## related documents
23 | ## notes
24 | 
25 | # Sprint Ready Checklist 
26 | 1. - [ ] Acceptance criteria defined 
27 | 2. - [ ] Team understands acceptance criteria 
28 | 3. - [ ] Team has defined solution / steps to satisfy acceptance criteria 
29 | 4. - [ ] Acceptance criteria is verifiable / testable 
30 | 5. - [ ] External / 3rd Party dependencies identified
31 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/yamllintconfig.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | extends: default
 4 | 
 5 | rules:
 6 |   braces:
 7 |     level: warning
 8 |     max-spaces-inside: 1
 9 |   brackets:
10 |     level: warning
11 |     max-spaces-inside: 1
12 |   colons:
13 |     level: warning
14 |   commas:
15 |     level: warning
16 |   comments: disable
17 |   comments-indentation: disable
18 |   document-start: disable
19 |   empty-lines:
20 |     level: warning
21 |   hyphens:
22 |     level: warning
23 |   indentation:
24 |     level: warning
25 |     indent-sequences: consistent
26 |   line-length:
27 |     max: 196
28 |     level: warning
29 |     allow-non-breakable-inline-mappings: true
30 |   truthy: disable
31 | 
32 | ignore: |
33 |   **/node_modules/**
34 |   **/vendor/**
35 |   **/dist/**
36 |   **/build/**
37 |   .pixi/envs/
38 |   **/.pixi/envs/**
39 |   **/dbt_packages/**
40 |   **.pixi/solve-group-envs/**


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/documentation/secops/add-key.md:
--------------------------------------------------------------------------------
 1 | # Add key for age encryption
 2 | 
 3 | example how to create a public/private pair for age based encryption
 4 | 
 5 | ## creating the keys for the new user
 6 | 
 7 | The new user executes:
 8 | 
 9 | ```bash
10 | age-keygen -o key.txt
11 | ```
12 | 
13 | The new user creates a pull request to add his/her public key to the file.
14 | 
15 | > Ensure that the key is added to the list of [public age keys](public-keys.txt).
16 | > Keys are comma separated - just add a new line and describe the owner/person for that key.
17 | 
18 | > Ensure all files to be encrypted are listed in [secops_config.sh][scripts/secops_config.sh]
19 | 
20 | Do not forget to re-encrypt all the secrets for this new user.
21 | 
22 | 
23 | The `key.txt` file should be placed at the root of the repository.
24 | 
25 | 
26 | ## encrypting/decrypting 
27 | 
28 | To en-/decrypt:
29 | 
30 | ```bash
31 | pixi run secrets-encrypt
32 | pixi run secrets-decrypt
33 | ```


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/model-server/README.md:
--------------------------------------------------------------------------------
 1 | # VLLM
 2 | 
 3 | Ensure the desired version of VLMM is selected here.
 4 | 
 5 | ## initial setup
 6 | 
 7 | ```
 8 | git clone --branch v0.9.2 --depth 1 https://github.com/vllm-project/vllm.git services/model-server/vllm
 9 | ```
10 | 
11 | 
12 | ## gpu vs cpu
13 | 
14 | ```
15 | docker run --runtime nvidia --gpus all \
16 |     -v ~/.cache/huggingface:/root/.cache/huggingface \
17 |     --env "HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}" \
18 |     -p 8000:8000 \
19 |     --ipc=host \
20 |     vllm/vllm-openai:latest \
21 |     --model mistralai/Mistral-7B-v0.1
22 | 
23 | docker run --privileged \
24 |     -v ~/.cache/huggingface:/root/.cache/huggingface \
25 |     --env "HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}" \
26 |     -p 8001:8000 \
27 |     --ipc=host \
28 |     public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.9.2 \
29 |     --model mistralai/Mistral-7B-v0.1
30 | 
31 | curl http://localhost:8000/v1/models
32 | curl http://localhost:8000/v1/completions \
33 |   -H "Content-Type: application/json" \
34 |   -d '{
35 |     "model": "mistralai/Mistral-7B-v0.1",
36 |     "prompt": "San Francisco is a",
37 |     "max_tokens": 7,
38 |     "temperature": 0
39 |   }'
40 | 
41 | curl http://localhost:8000/v1/completions \
42 |   -H "Content-Type: application/json" \
43 |   -d '{
44 |     "model": "microsoft/Phi-4-mini-instruct",
45 |     "prompt": "San Francisco is a",
46 |     "max_tokens": 7,
47 |     "temperature": 0
48 |   }'
49 | 
50 | curl http://localhost:8000/v1/completions \
51 |   -H "Content-Type: application/json" \
52 |   -d '{
53 |     "model": "meta-llama/Llama-3.2-1B-Instruct",
54 |     "prompt": "San Francisco is a",
55 |     "max_tokens": 7,
56 |     "temperature": 0
57 |   }'
58 | ```


--------------------------------------------------------------------------------
/.env_template:
--------------------------------------------------------------------------------
 1 | # Domain configuration
 2 | ROOT_DOMAIN=project.docker # see LOCALDNS.md for more information
 3 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22
 4 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12
 5 | TZ=UTC # default to UTC (Universal Time Coordinated) and let auto-detect happen with ./generate-env.sh
 6 | 
 7 | # LLM Router Database (LiteLLM)
 8 | LLM_ROUTER_DB=litellm
 9 | LLM_ROUTER_DB_USER=litellm
10 | # Generate with: openssl rand -hex 32
11 | LLM_ROUTER_DB_PASSWORD=CHANGEME_GENERATE_HEX32
12 | 
13 | # LiteLLM Configuration
14 | # Generate with: openssl rand -hex 32
15 | LITELLM_MASTER_KEY=CHANGEME_GENERATE_HEX32
16 | # Generate with: openssl rand -hex 32
17 | LITELLM_SALT_KEY=CHANGEME_GENERATE_HEX32
18 | LITELLM_UI_USERNAME=admin
19 | LITELLM_UI_PASSWORD=CHANGEME_SECURE_PASSWORD
20 | 
21 | # API Keys for Model Providers
22 | # Get from: https://platform.openai.com/api-keys
23 | ROUTER_OPENAI_API_KEY=sk-CHANGEME_YOUR_OPENAI_KEY
24 | 
25 | # Get from: https://console.anthropic.com/settings/keys
26 | ROUTER_ANTHROPIC_API_KEY=sk-CHANGEME_YOUR_ANTHROPIC_KEY
27 | 
28 | # Chat UI Database (OpenWebUI)
29 | CHAT_UI_DB=openwebui
30 | CHAT_UI_DB_USER=openwebui
31 | # Generate with: openssl rand -hex 32
32 | CHAT_UI_DB_PASSWORD=CHANGEME_GENERATE_HEX32
33 | # Generate with: openssl rand -hex 32
34 | CHAT_UI_SECRET_KEY=CHANGEME_GENERATE_HEX32
35 | 
36 | # Vector Database (Qdrant)
37 | # Generate with: openssl rand -hex 32
38 | QDRANT__SERVICE__API_KEY=CHANGEME_GENERATE_HEX32
39 | 
40 | # Hugging Face API Key
41 | # Get from: https://huggingface.co/settings/tokens
42 | HUGGING_FACE_HUB_TOKEN=hf_CHANGEME_YOUR_HUGGING_FACE_API_KEY
43 | 


--------------------------------------------------------------------------------
/services/proxy/rules/middlewares.yml:
--------------------------------------------------------------------------------
 1 | http:
 2 |   middlewares:
 3 | 
 4 |     middlewares-rate-limit:
 5 |       rateLimit:
 6 |         average: 100
 7 |         burst: 50
 8 | 
 9 |     middlewares-https-redirectscheme:
10 |       redirectScheme:
11 |         scheme: https
12 |         permanent: true
13 | 
14 |     middlewares-secure-headers:
15 |       headers:
16 |         accessControlAllowMethods:
17 |           - GET
18 |           - OPTIONS
19 |           - PUT
20 |         accessControlMaxAge: 100
21 |         hostsProxyHeaders:
22 |           - "X-Forwarded-Host"
23 |         stsSeconds: 63072000
24 |         stsIncludeSubdomains: true
25 |         stsPreload: true
26 |         forceSTSHeader: true
27 |         
28 |         # TODO: Enable for templated repo
29 |         customFrameOptionsValue: "allow-from https:{{env "ROOT_DOMAIN"}}"
30 |         contentTypeNosniff: true
31 |         browserXssFilter: true
32 |         # sslForceHost: true # add sslHost to all of the services
33 |         # sslHost: sslHost: "{{env "ROOT_DOMAIN"}}"
34 |         referrerPolicy: "same-origin"
35 |         permissionsPolicy: "camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=()"
36 |         customResponseHeaders:
37 |           X-Robots-Tag: "none,noarchive,nosnippet,notranslate,noimageindex,"
38 |           server: ""
39 |           # https://community.traefik.io/t/how-to-make-websockets-work-with-traefik-2-0-setting-up-rancher/1732
40 |           # X-Forwarded-Proto: "https"
41 | 
42 |     middlewares-compress:
43 |       compress: {}
44 | 
45 |     # https://stackoverflow.com/questions/49717670/how-to-config-upload-body-size-restriction-in-traefik
46 |     middlewares-buffering:
47 |       buffering:
48 |         maxResponseBodyBytes: 2000000
49 |         maxRequestBodyBytes: 10485760
50 |         memRequestBodyBytes: 2097152
51 |         memResponseBodyBytes: 2097152
52 |         retryExpression: "IsNetworkError() && Attempts() <= 2"
53 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/proxy/rules/middlewares.yml:
--------------------------------------------------------------------------------
 1 | http:
 2 |   middlewares:
 3 | 
 4 |     middlewares-rate-limit:
 5 |       rateLimit:
 6 |         average: 100
 7 |         burst: 500
 8 | 
 9 |     middlewares-https-redirectscheme:
10 |       redirectScheme:
11 |         scheme: https
12 |         permanent: true
13 | 
14 |     middlewares-secure-headers:
15 |       headers:
16 |         accessControlAllowMethods:
17 |           - GET
18 |           - OPTIONS
19 |           - PUT
20 |         accessControlMaxAge: 100
21 |         hostsProxyHeaders:
22 |           - "X-Forwarded-Host"
23 |         stsSeconds: 63072000
24 |         stsIncludeSubdomains: true
25 |         stsPreload: true
26 |         forceSTSHeader: true
27 |         
28 |         # TODO: Enable for templated repo
29 |         customFrameOptionsValue: {% raw %}"allow-from https:{{env "ROOT_DOMAIN"}}"{% endraw %}
30 |         contentTypeNosniff: true
31 |         browserXssFilter: true
32 |         # sslForceHost: true # add sslHost to all of the services
33 |         # sslHost: {% raw %}sslHost: "{{env "ROOT_DOMAIN"}}"{% endraw %}
34 |         referrerPolicy: "same-origin"
35 |         permissionsPolicy: "camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=()"
36 |         customResponseHeaders:
37 |           X-Robots-Tag: "none,noarchive,nosnippet,notranslate,noimageindex,"
38 |           server: ""
39 |           # https://community.traefik.io/t/how-to-make-websockets-work-with-traefik-2-0-setting-up-rancher/1732
40 |           # X-Forwarded-Proto: "https"
41 | 
42 |     middlewares-compress:
43 |       compress: {}
44 | 
45 |     # https://stackoverflow.com/questions/49717670/how-to-config-upload-body-size-restriction-in-traefik
46 |     middlewares-buffering:
47 |       buffering:
48 |         maxResponseBodyBytes: 2000000
49 |         maxRequestBodyBytes: 10485760
50 |         memRequestBodyBytes: 2097152
51 |         memResponseBodyBytes: 2097152
52 |         retryExpression: "IsNetworkError() && Attempts() <= 2"
53 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "llm-in-a-box-template"
 3 | version = "1.0.0"
 4 | description = "LLM in a box template"
 5 | authors = [{ name = "Georg Heiler", email = "heiler@csh.ac.at" }]
 6 | requires-python = ">= 3.13,< 3.14"
 7 | 
 8 | [tool.pixi.project]
 9 | platforms = ["linux-64", "osx-arm64", "win-64", "linux-aarch64"]
10 | channels = ["conda-forge"]
11 | 
12 | [build-system]
13 | build-backend = "hatchling.build"
14 | requires = ["hatchling"]
15 | 
16 | [tool.pixi.environments]
17 | secops = { features = ["secops"], solve-group = "secops" }
18 | ci = { features = ["ci-basics"], solve-group = "ci" }
19 | template = { features = ["template"], solve-group = "template" }
20 | 
21 | [tool.pixi.feature.ci-basics.dependencies]
22 | yamllint = ">=1.35.1,<2"
23 | taplo = ">=0.9.3,<0.11"
24 | 
25 | [tool.pixi.feature.template.dependencies]
26 | cruft = "~=2.16.0"
27 | cookiecutter = "~=2.6.0"
28 | 
29 | [tool.pixi.feature.template.pypi-dependencies]
30 | jinja2-ospath = ">=0.3.0,<0.4.0"
31 | 
32 | [tool.pixi.feature.secops.dependencies]
33 | go-sops = "~=3.9.4"
34 | age = "~=1.2.1"
35 | 
36 | 
37 | [tool.pixi.tasks.fmt]
38 | cmd = "pixi run -e ci yamllint -c yamllintconfig.yaml . && taplo fmt"
39 | description = "Format yaml & toml files"
40 | env = { RUST_LOG = "warn" }
41 | 
42 | [tool.pixi.tasks.tpl-init-cruft]
43 | cmd = "pixi run -e template cruft create https://github.com/complexity-science-hub/llm-in-a-box-template.git"
44 | description = "Initialize template"
45 | 
46 | 
47 | [tool.pixi.tasks.render-dev]
48 | cmd = "pixi run --frozen -e template cruft create . --no-input --overwrite-if-exists --output-dir rendered-template && ./scripts/handle-dev-secrets.sh"
49 | description = "locally render a development instance of the template"
50 | 
51 | [tool.pixi.tasks.start-template]
52 | cmd = "pixi run --frozen -e template docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up"
53 | cwd = "rendered-template/llm_in_a_box"
54 | description = "spin up the template"
55 | 
56 | [tool.pixi.tasks.secrets-encrypt]
57 | cmd = "pixi run -e secops ./scripts/encrypt_secrets.sh"
58 | description = "encrypt secrets with SOPS and AGE"
59 | 
60 | [tool.pixi.tasks.secrets-decrypt]
61 | cmd = "pixi run -e secops ./scripts/decrypt_secrets.sh"
62 | description = "decrypt secrets with SOPS and AGE"
63 | 


--------------------------------------------------------------------------------
/docs/paper.bib:
--------------------------------------------------------------------------------
 1 | @misc{ollama,
 2 |   author = {He, Jeffrey and Paull, Michael and others},
 3 |   title = {Ollama},
 4 |   year = {2023},
 5 |   publisher = {GitHub},
 6 |   journal = {GitHub repository},
 7 |   howpublished = {\url{https://github.com/ollama/ollama}}
 8 | }
 9 | 
10 | @misc{litellm,
11 |   author = {K K, Ishaan and others},
12 |   title = {LiteLLM},
13 |   year = {2023},
14 |   publisher = {GitHub},
15 |   journal = {GitHub repository},
16 |   howpublished = {\url{https://github.com/BerriAI/litellm}}
17 | }
18 | 
19 | @misc{openwebui,
20 |   author = {Tym, Timothy and others},
21 |   title = {Open-WebUI},
22 |   year = {2023},
23 |   publisher = {GitHub},
24 |   journal = {GitHub repository},
25 |   howpublished = {\url{https://github.com/open-webui/open-webui}}
26 | }
27 | 
28 | @article{hettrick2013uk,
29 |   title={UK Research Software Engineer Survey 2013},
30 |   author={Hettrick, Simon and others},
31 |   year={2013},
32 |   publisher={Software Sustainability Institute},
33 |   doi={10.5281/zenodo.14809},
34 |   url={https://doi.org/10.5281/zenodo.14809}
35 | }
36 | @misc{graham_mcps_2025,
37 | 	type = {Substack newsletter},
38 | 	title = {{MCPs}, {Gatekeepers}, and the {Future} of {AI}},
39 | 	url = {https://iamcharliegraham.substack.com/p/mcps-gatekeepers-and-the-future-of},
40 | 	abstract = {MCPs—Model Context Protocols—are set to transform AI from passive chatbots into powerful, action-taking agents. But the real story isn’t what MCPs enable—it’s who controls them.},
41 | 	urldate = {2025-04-24},
42 | 	journal = {In The AIrena},
43 | 	author = {Graham, Charlie},
44 | 	month = apr,
45 | 	year = {2025},
46 | 	file = {Snapshot:/Users/geoheil/Zotero/storage/N4YFTQQU/mcps-gatekeepers-and-the-future-of.html:text/html},
47 | }
48 | @misc{auer2024doclingtechnicalreport,
49 |       title={Docling Technical Report}, 
50 |       author={Christoph Auer and Maksym Lysak and Ahmed Nassar and Michele Dolfi and Nikolaos Livathinos and Panos Vagenas and Cesar Berrospi Ramis and Matteo Omenetti and Fabian Lindlbauer and Kasper Dinkla and Lokesh Mishra and Yusik Kim and Shubham Gupta and Rafael Teixeira de Lima and Valery Weber and Lucas Morin and Ingmar Meijer and Viktor Kuropiatnyk and Peter W. J. Staar},
51 |       year={2024},
52 |       eprint={2408.09869},
53 |       archivePrefix={arXiv},
54 |       primaryClass={cs.CL},
55 |       url={https://arxiv.org/abs/2408.09869}, 
56 | }


--------------------------------------------------------------------------------
/services/keykloak/Dockerfile:
--------------------------------------------------------------------------------
 1 | # ###############################################################################
 2 | # # Dockerfile – Keycloak 26.2 with PostgreSQL build + SCIM plug-in (auto-fetch)
 3 | # ###############################################################################
 4 | 
 5 | # ######################## 0️⃣  Parameters you might tweak ######################
 6 | # # Pass SCIM_VERSION at build time if you want a different tag, e.g.
 7 | # #   docker build --build-arg SCIM_VERSION=2.2.2 -t my/keycloak:26.2-scim .
 8 | # ARG SCIM_VERSION=2.2.1
 9 | # # The download URL template (GitHub project: Captain-P-Goldfish/scim-for-keycloak)
10 | # ARG SCIM_URL_TEMPLATE="https://github.com/Captain-P-Goldfish/scim-for-keycloak/releases/download/v${SCIM_VERSION}/scim-for-keycloak-kc-26-${SCIM_VERSION}-free.jar"
11 | 
12 | # ######################## 1️⃣  Fetch stage – download the JAR ##################
13 | # FROM alpine:3.19 AS fetcher
14 | # ARG SCIM_VERSION
15 | # ARG SCIM_URL_TEMPLATE
16 | # RUN apk add --no-cache curl ca-certificates && \
17 | #     curl -L --fail -o /scim-plugin.jar "${SCIM_URL_TEMPLATE}"
18 | 
19 | # ######################## 2️⃣  Build/augmentation stage #######################
20 | # FROM quay.io/keycloak/keycloak:26.2 AS builder
21 | 
22 | # # ---- Build-time config (becomes immutable under --optimized) ---------------
23 | # ENV KC_DB=postgres \
24 | #     KC_HEALTH_ENABLED=true \
25 | #     KC_METRICS_ENABLED=true \
26 | #     KC_FEATURES=token-exchange,admin-fine-grained-authz
27 | 
28 | # # Copy downloaded SCIM plug-in JAR into providers directory
29 | # COPY --from=fetcher /scim-plugin.jar /opt/keycloak/providers/
30 | 
31 | # # Run Quarkus build once and strip temp files
32 | # RUN /opt/keycloak/bin/kc.sh build --optimized && \
33 | #     rm -rf /opt/keycloak/standalone/tmp/*
34 | 
35 | # ######################## 3️⃣  Runtime stage – slim, immutable ################
36 | # FROM quay.io/keycloak/keycloak:26.2  AS runtime
37 | # LABEL org.opencontainers.image.title="Keycloak 26 optimized w/ SCIM" \
38 | #       org.opencontainers.image.description="Optimised Keycloak with SCIM-for-Keycloak plug-in auto-fetched at build time." \
39 | #       maintainer="you@example.com"
40 | 
41 | # COPY --from=builder /opt/keycloak/ /opt/keycloak/
42 | # USER 1000
43 | # EXPOSE 8080 9000
44 | # ENTRYPOINT ["/opt/keycloak/bin/kc.sh", "start", "--optimized"]
45 | # ###############################################################################
46 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/keykloak/Dockerfile:
--------------------------------------------------------------------------------
 1 | # ###############################################################################
 2 | # # Dockerfile – Keycloak 26.2 with PostgreSQL build + SCIM plug-in (auto-fetch)
 3 | # ###############################################################################
 4 | 
 5 | # ######################## 0️⃣  Parameters you might tweak ######################
 6 | # # Pass SCIM_VERSION at build time if you want a different tag, e.g.
 7 | # #   docker build --build-arg SCIM_VERSION=2.2.2 -t my/keycloak:26.2-scim .
 8 | # ARG SCIM_VERSION=2.2.1
 9 | # # The download URL template (GitHub project: Captain-P-Goldfish/scim-for-keycloak)
10 | # ARG SCIM_URL_TEMPLATE="https://github.com/Captain-P-Goldfish/scim-for-keycloak/releases/download/v${SCIM_VERSION}/scim-for-keycloak-kc-26-${SCIM_VERSION}-free.jar"
11 | 
12 | # ######################## 1️⃣  Fetch stage – download the JAR ##################
13 | # FROM alpine:3.19 AS fetcher
14 | # ARG SCIM_VERSION
15 | # ARG SCIM_URL_TEMPLATE
16 | # RUN apk add --no-cache curl ca-certificates && \
17 | #     curl -L --fail -o /scim-plugin.jar "${SCIM_URL_TEMPLATE}"
18 | 
19 | # ######################## 2️⃣  Build/augmentation stage #######################
20 | # FROM quay.io/keycloak/keycloak:26.2 AS builder
21 | 
22 | # # ---- Build-time config (becomes immutable under --optimized) ---------------
23 | # ENV KC_DB=postgres \
24 | #     KC_HEALTH_ENABLED=true \
25 | #     KC_METRICS_ENABLED=true \
26 | #     KC_FEATURES=token-exchange,admin-fine-grained-authz
27 | 
28 | # # Copy downloaded SCIM plug-in JAR into providers directory
29 | # COPY --from=fetcher /scim-plugin.jar /opt/keycloak/providers/
30 | 
31 | # # Run Quarkus build once and strip temp files
32 | # RUN /opt/keycloak/bin/kc.sh build --optimized && \
33 | #     rm -rf /opt/keycloak/standalone/tmp/*
34 | 
35 | # ######################## 3️⃣  Runtime stage – slim, immutable ################
36 | # FROM quay.io/keycloak/keycloak:26.2  AS runtime
37 | # LABEL org.opencontainers.image.title="Keycloak 26 optimized w/ SCIM" \
38 | #       org.opencontainers.image.description="Optimised Keycloak with SCIM-for-Keycloak plug-in auto-fetched at build time." \
39 | #       maintainer="you@example.com"
40 | 
41 | # COPY --from=builder /opt/keycloak/ /opt/keycloak/
42 | # USER 1000
43 | # EXPOSE 8080 9000
44 | # ENTRYPOINT ["/opt/keycloak/bin/kc.sh", "start", "--optimized"]
45 | # ###############################################################################
46 | 


--------------------------------------------------------------------------------
/services/model-server/README.md:
--------------------------------------------------------------------------------
 1 | # VLLM
 2 | 
 3 | Ensure the desired version of VLMM is selected here.
 4 | 
 5 | ## initial setup
 6 | 
 7 | ```
 8 | git clone --branch v0.9.1 --depth 1 https://github.com/vllm-project/vllm.git
 9 | 
10 | # alternatively
11 | git submodule add https://github.com/vllm-project/vllm.git vllm
12 | cd vllm
13 | git checkout v0.9.1
14 | ```
15 | 
16 | ## updating a new version of vllm
17 | 
18 | Ensure the right version is set in docker compose: `SETUPTOOLS_SCM_PRETEND_VERSION_FOR_VLLM: "0.9.1"`
19 | 
20 | ```bash
21 | # 1. Navigate into the submodule directory
22 | cd services/model-server/vllm
23 | 
24 | # 2. Add the original vLLM repo as a new remote called "upstream"
25 | git remote add upstream https://github.com/vllm-project/vllm.git
26 | 
27 | # 3. Verify that it's set up correctly
28 | git remote -v
29 | 
30 | git fetch upstream
31 | git fetch upstream --tags
32 | 
33 | # ensure we are on our hotfixed branch
34 | # ba28a8452b4e278b7da4e7a1eb1bc5a334a755ca
35 | git checkout template-git-fix
36 | 
37 | # git rebase upstream/main
38 | # This is the key command. Read it as:
39 | # "Rebase ONTO v0.9.2, all commits that are on my current branch (template-git-fix)
40 | #  since it diverged from v0.9.1."
41 | git rebase --onto v0.9.2 v0.9.1 template-git-fix
42 | 
43 | git push --force-with-lease origin template-git-fix
44 | 
45 | git add services/model-server/vllm
46 | git commit -m "chore: Update vllm submodule to v<TODO set version> from upstream"
47 | git push
48 | ```
49 | 
50 | ## gpu vs cpu
51 | 
52 | ```
53 | docker run --runtime nvidia --gpus all \
54 |     -v ~/.cache/huggingface:/root/.cache/huggingface \
55 |     --env "HUGGING_FACE_HUB_TOKEN=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" \
56 |     -p 8000:8000 \
57 |     --ipc=host \
58 |     vllm/vllm-openai:latest \
59 |     --model mistralai/Mistral-7B-v0.1
60 | 
61 | docker run --privileged \
62 |     -v ~/.cache/huggingface:/root/.cache/huggingface \
63 |     --env "HUGGING_FACE_HUB_TOKEN=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" \
64 |     -p 8000:8000 \
65 |     --ipc=host \
66 |     public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.9.1 \
67 |     --model mistralai/Mistral-7B-v0.1
68 | 
69 | 
70 | 
71 |     
72 | 
73 | 
74 | curl http://localhost:8000/v1/models
75 | curl http://localhost:8000/v1/completions \
76 |   -H "Content-Type: application/json" \
77 |   -d '{
78 |     "model": "mistralai/Mistral-7B-v0.1",
79 |     "prompt": "San Francisco is a",
80 |     "max_tokens": 7,
81 |     "temperature": 0
82 |   }'
83 | 
84 | ```


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: secrets-encrypt
 2 | ## encrypt secrets with SOPS and AGE
 3 | secrets-encrypt:
 4 | 	pixi run secrets-encrypt
 5 | 
 6 | 
 7 | .PHONY: secrets-decrypt
 8 | ## encrypt secrets with SOPS and AGE
 9 | secrets-decrypt:
10 | 	pixi run secrets-decrypt
11 | 
12 | 
13 | #################################################################################
14 | # PROJECT RULES                                                                 #
15 | #################################################################################
16 | #################################################################################
17 | # Self Documenting Commands                                                     #
18 | #################################################################################
19 | .DEFAULT_GOAL := help
20 | # Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
21 | # sed script explained:
22 | # /^##/:
23 | # 	* save line in hold space
24 | # 	* purge line
25 | # 	* Loop:
26 | # 		* append newline + line to hold space
27 | # 		* go to next line
28 | # 		* if line starts with doc comment, strip comment character off and loop
29 | # 	* remove target prerequisites
30 | # 	* append hold space (+ newline) to line
31 | # 	* replace newline plus comments by `---`
32 | # 	* print line
33 | # Separate expressions are necessary because labels cannot be delimited by
34 | # semicolon; see <http://stackoverflow.com/a/11799865/1968>
35 | tasks:
36 | 	pixi task
37 | 
38 | .PHONY: help
39 | help:
40 | 	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
41 | 	@echo
42 | 	@sed -n -e "/^## / { \
43 | 		h; \
44 | 		s/.*//; \
45 | 		:doc" \
46 | 		-e "H; \
47 | 		n; \
48 | 		s/^## //; \
49 | 		t doc" \
50 | 		-e "s/:.*//; \
51 | 		G; \
52 | 		s/\\n## /---/; \
53 | 		s/\\n/ /g; \
54 | 		p; \
55 | 	}" ${MAKEFILE_LIST} \
56 | 	| LC_ALL='C' sort --ignore-case \
57 | 	| awk -F '---' \
58 | 		-v ncol=$$(tput cols) \
59 | 		-v indent=19 \
60 | 		-v col_on="$$(tput setaf 6)" \
61 | 		-v col_off="$$(tput sgr0)" \
62 | 	'{ \
63 | 		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
64 | 		n = split($$2, words, " "); \
65 | 		line_length = ncol - indent; \
66 | 		for (i = 1; i <= n; i++) { \
67 | 			line_length -= length(words[i]) + 1; \
68 | 			if (line_length <= 0) { \
69 | 				line_length = ncol - indent - length(words[i]) - 1; \
70 | 				printf "\n%*s ", -indent, " "; \
71 | 			} \
72 | 			printf "%s ", words[i]; \
73 | 		} \
74 | 		printf "\n"; \
75 | 	}' \
76 | 	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
77 | 


--------------------------------------------------------------------------------
/docs/LOCALDNS.md:
--------------------------------------------------------------------------------
 1 | # Local DNS
 2 | 
 3 | ## Allowable root domains for local development environments
 4 | 
 5 | When working with local development environments on your laptop, you have flexibility in choosing root domains that are primarily used for internal access and testing purposes.
 6 | 
 7 | Here are some common practices and considerations:
 8 | 
 9 | ### 0. Using a proper DNS with redircts
10 | 
11 | -  http://*.llminabox.geoheil.com will redirect to 127.0.0.1
12 | - 
13 | ### 1. Using `.localhost`
14 | 
15 | The `.localhost` Top-Level Domain (TLD) is specifically reserved for loopback purposes, making it an excellent choice for local development.
16 | It's statically defined in host DNS implementations to point to the loopback IP address (127.0.0.1) and is ideal when you need to access a service running directly on your machine without relying on external DNS resolution.
17 | 
18 | ### 2. Using custom TLDs for local development
19 | 
20 | Many developers use made-up TLDs, such as `.docker` or other custom extensions, to organize their local development domains. For example, a Docker container named "project" might be accessible through `project.docker`.
21 | 
22 | It's important to be mindful of potential conflicts, especially when using TLDs that are eventually registered publicly, like the case of the `.dev` TLD which was later acquired by Google and is now a valid registrable domain.
23 | 
24 | For these cases, you will need to configure your local DNS settings, such as the `/etc/hosts` file, or use a local DNS server like dnsmasq to resolve your chosen custom domains to the appropriate IP addresses (usually 127.0.0.1 or a Docker container's IP).
25 | 
26 | ### 3. Using `.home.arpa` for home networks
27 | 
28 | The Internet Engineering Task Force (IETF) approved the .home.arpa TLD specifically for home network use.
29 | 
30 | This TLD is suitable when configuring domains within your local home network, such as when assigning names to devices through your router's DHCP server.
31 | 
32 | ### 4. Using subdomains of a registered domain
33 | 
34 | A more robust approach, especially for complex local development setups or those with future public-facing applications, involves using subdomains of a domain you already own.
35 | 
36 | For instance, if you own `example.com`, you could use `corp.example.com` for your internal development environment or `jellyfin.example.com` for a media server.
37 | 
38 | This strategy helps prevent potential conflicts with publicly registered domains and allows for smoother transitions if your local projects are eventually deployed to production environments.
39 | 
40 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: tpl-update
  2 | ## Update template project
  3 | tpl-update:
  4 | 	pixi run tpl-update
  5 | 
  6 | 
  7 | .PHONY: cleanup-state
  8 | ## cleanup  state
  9 | cleanup-state:
 10 | 	pixi run cleanup-state
 11 | 
 12 | 
 13 | .PHONY: fmt 
 14 | ## basic auto formatting
 15 | fmt:
 16 | 	pixi run -e ci-basics fmt
 17 | 
 18 | 
 19 | .PHONY: fmt-unsafe
 20 | ## enhanced auto formatting
 21 | fmt-unsafe:
 22 | 	pixi run -e ci-basics fmt-unsafe
 23 | 
 24 | 
 25 | .PHONY: lint
 26 | ## Ruff based flake8 style linting plus type checking via pyright
 27 | lint:
 28 | 	pixi run -e ci-validation lint
 29 | 
 30 | 
 31 | .PHONY: test
 32 | ## Execute tests with coverage
 33 | test:
 34 | 	pixi run -e ci-validation test
 35 | 
 36 | 
 37 | 
 38 | .PHONY: secrets-encrypt
 39 | ## encrypt secrets with SOPS and AGE
 40 | secrets-encrypt:
 41 | 	pixi run secrets-encrypt
 42 | 
 43 | 
 44 | .PHONY: secrets-decrypt
 45 | ## encrypt secrets with SOPS and AGE
 46 | secrets-decrypt:
 47 | 	pixi run secrets-decrypt
 48 | 
 49 | 
 50 | ## cleanup local non used branches
 51 | clean-local-branches:
 52 | 	pixi run clean-local-branches
 53 | 
 54 | 
 55 | #################################################################################
 56 | # PROJECT RULES                                                                 #
 57 | #################################################################################
 58 | #################################################################################
 59 | # Self Documenting Commands                                                     #
 60 | #################################################################################
 61 | .DEFAULT_GOAL := help
 62 | # Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
 63 | # sed script explained:
 64 | # /^##/:
 65 | # 	* save line in hold space
 66 | # 	* purge line
 67 | # 	* Loop:
 68 | # 		* append newline + line to hold space
 69 | # 		* go to next line
 70 | # 		* if line starts with doc comment, strip comment character off and loop
 71 | # 	* remove target prerequisites
 72 | # 	* append hold space (+ newline) to line
 73 | # 	* replace newline plus comments by `---`
 74 | # 	* print line
 75 | # Separate expressions are necessary because labels cannot be delimited by
 76 | # semicolon; see <http://stackoverflow.com/a/11799865/1968>
 77 | tasks:
 78 | 	pixi task
 79 | 
 80 | .PHONY: help
 81 | help:
 82 | 	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
 83 | 	@echo
 84 | 	@sed -n -e "/^## / { \
 85 | 		h; \
 86 | 		s/.*//; \
 87 | 		:doc" \
 88 | 		-e "H; \
 89 | 		n; \
 90 | 		s/^## //; \
 91 | 		t doc" \
 92 | 		-e "s/:.*//; \
 93 | 		G; \
 94 | 		s/\\n## /---/; \
 95 | 		s/\\n/ /g; \
 96 | 		p; \
 97 | 	}" ${MAKEFILE_LIST} \
 98 | 	| LC_ALL='C' sort --ignore-case \
 99 | 	| awk -F '---' \
100 | 		-v ncol=$$(tput cols) \
101 | 		-v indent=19 \
102 | 		-v col_on="$$(tput setaf 6)" \
103 | 		-v col_off="$$(tput sgr0)" \
104 | 	'{ \
105 | 		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
106 | 		n = split($$2, words, " "); \
107 | 		line_length = ncol - indent; \
108 | 		for (i = 1; i <= n; i++) { \
109 | 			line_length -= length(words[i]) + 1; \
110 | 			if (line_length <= 0) { \
111 | 				line_length = ncol - indent - length(words[i]) - 1; \
112 | 				printf "\n%*s ", -indent, " "; \
113 | 			} \
114 | 			printf "%s ", words[i]; \
115 | 		} \
116 | 		printf "\n"; \
117 | 	}' \
118 | 	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
119 | 


--------------------------------------------------------------------------------
/services/llm-router/litellm_config.yml:
--------------------------------------------------------------------------------
 1 | general_settings:
 2 |   master_key: "os.environ/LITELLM_MASTER_KEY"
 3 |   block_robots: true
 4 |   #alerting: ["slack"]      # Setup slack alerting - get alerts on LLM exceptions, Budget Alerts, Slow LLM Responses
 5 |   proxy_batch_write_at: 60 # Batch write spend updates every 60s
 6 |   database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number)
 7 | 
 8 |   # OPTIONAL Best Practices
 9 |   #disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus
10 |   disable_error_logs: True # turn off writing LLM Exceptions to DB
11 |   #allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet.
12 | 
13 | litellm_settings:
14 |   request_timeout: 600    # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
15 |   #set_verbose: False      # Switch off Debug Logging, ensure your logs do not have any debugging on
16 |   #json_logs: true         # Get debug logs in json format
17 | model_list:
18 |   - model_name: gpt-4.1-2025-04-14
19 |     litellm_params:
20 |       model: openai/gpt-4.1-2025-04-14
21 |       api_key: os.environ/OPENAI_API_KEY
22 |   - model_name: gpt-4o-2024-11-20
23 |     litellm_params:
24 |       model: openai/gpt-4o-2024-11-20
25 |       api_key: os.environ/OPENAI_API_KEY
26 |   - model_name: gpt-4o-mini-2024-07-18
27 |     litellm_params:
28 |       model: openai/gpt-4o-mini-2024-07-18
29 |       api_key: os.environ/OPENAI_API_KEY
30 |   - model_name: claude-sonnet-4-20250514
31 |     litellm_params:
32 |       model: claude-sonnet-4-20250514
33 |       api_key: "os.environ/ANTHROPIC_API_KEY"
34 |   - model_name: claude-3-7-sonnet-20250219
35 |     litellm_params:
36 |       model: claude-3-7-sonnet-20250219
37 |       api_key: "os.environ/ANTHROPIC_API_KEY"
38 |   - model_name: gemini-2.5-flash-preview-05-20
39 |     litellm_params:
40 |       model: vertex_ai/gemini-2.5-flash-preview-05-20
41 |       vertex_project: "ascii-450914"
42 |       vertex_location: "us-central1"
43 |       vertex_credentials: "/secrets/google_vertexai.json"
44 |   - model_name: gemini-2.5-pro-preview-06-05
45 |     litellm_params:
46 |       model: vertex_ai/gemini-2.5-pro-preview-06-05
47 |       vertex_project: "ascii-450914"
48 |       vertex_location: "global"
49 |       vertex_credentials: "/secrets/google_vertexai.json"
50 |   - model_name: gemini-2.5-pro-preview-05-06
51 |     litellm_params:
52 |       model: vertex_ai/gemini-2.5-pro-preview-05-06
53 |       vertex_project: "ascii-450914"
54 |       vertex_location: "us-central1"
55 |       vertex_credentials: "/secrets/google_vertexai.json"
56 |   - model_name: gemini-2.5-flash-preview-04-17
57 |     litellm_params:
58 |       model: vertex_ai/gemini-2.5-flash-preview-04-17
59 |       vertex_project: "ascii-450914"
60 |       vertex_location: "us-central1"
61 |       vertex_credentials: "/secrets/google_vertexai.json"
62 |   - model_name: gemini-2.0-flash-preview-image-generation
63 |     litellm_params:
64 |       model: vertex_ai/gemini-2.0-flash-preview-image-generation
65 |       vertex_project: "ascii-450914"
66 |       vertex_location: "us-central1"
67 |       vertex_credentials: "/secrets/google_vertexai.json"
68 |   - model_name: gemini-2.0-flash-lite-001
69 |     litellm_params:
70 |       model: vertex_ai/gemini-2.0-flash-lite-001
71 |       vertex_project: "ascii-450914"
72 |       vertex_location: "us-central1"
73 |       vertex_credentials: "/secrets/google_vertexai.json"
74 |   - model_name: "gemma3:4b"
75 |     litellm_params:
76 |       model: "ollama_chat/gemma3:4b"
77 |       api_base: "http://ollama:11434"
78 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/llm-router/litellm_config.yml:
--------------------------------------------------------------------------------
 1 | general_settings:
 2 |   master_key: "os.environ/LITELLM_MASTER_KEY"
 3 |   block_robots: true
 4 |   #alerting: ["slack"]      # Setup slack alerting - get alerts on LLM exceptions, Budget Alerts, Slow LLM Responses
 5 |   proxy_batch_write_at: 60 # Batch write spend updates every 60s
 6 |   database_connection_pool_limit: 10 # limit the number of database connections to = MAX Number of DB Connections/Number of instances of litellm proxy (Around 10-20 is good number)
 7 | 
 8 |   # OPTIONAL Best Practices
 9 |   #disable_spend_logs: True # turn off writing each transaction to the db. We recommend doing this is you don't need to see Usage on the LiteLLM UI and are tracking metrics via Prometheus
10 |   disable_error_logs: True # turn off writing LLM Exceptions to DB
11 |   #allow_requests_on_db_unavailable: True # Only USE when running LiteLLM on your VPC. Allow requests to still be processed even if the DB is unavailable. We recommend doing this if you're running LiteLLM on VPC that cannot be accessed from the public internet.
12 | 
13 | litellm_settings:
14 |   request_timeout: 600    # raise Timeout error if call takes longer than 600 seconds. Default value is 6000seconds if not set
15 |   #set_verbose: False      # Switch off Debug Logging, ensure your logs do not have any debugging on
16 |   #json_logs: true         # Get debug logs in json format
17 | model_list:
18 |   - model_name: gpt-4.1-2025-04-14
19 |     litellm_params:
20 |       model: openai/gpt-4.1-2025-04-14
21 |       api_key: os.environ/OPENAI_API_KEY
22 |   - model_name: gpt-4o-2024-11-20
23 |     litellm_params:
24 |       model: openai/gpt-4o-2024-11-20
25 |       api_key: os.environ/OPENAI_API_KEY
26 |   - model_name: gpt-4o-mini-2024-07-18
27 |     litellm_params:
28 |       model: openai/gpt-4o-mini-2024-07-18
29 |       api_key: os.environ/OPENAI_API_KEY
30 |   - model_name: claude-sonnet-4-20250514
31 |     litellm_params:
32 |       model: claude-sonnet-4-20250514
33 |       api_key: "os.environ/ANTHROPIC_API_KEY"
34 |   - model_name: claude-3-7-sonnet-20250219
35 |     litellm_params:
36 |       model: claude-3-7-sonnet-20250219
37 |       api_key: "os.environ/ANTHROPIC_API_KEY"
38 |   - model_name: gemini-2.5-flash-preview-05-20
39 |     litellm_params:
40 |       model: vertex_ai/gemini-2.5-flash-preview-05-20
41 |       vertex_project: "ascii-450914"
42 |       vertex_location: "us-central1"
43 |       vertex_credentials: "/secrets/google_vertexai.json"
44 |   - model_name: gemini-2.5-pro-preview-06-05
45 |     litellm_params:
46 |       model: vertex_ai/gemini-2.5-pro-preview-06-05
47 |       vertex_project: "ascii-450914"
48 |       vertex_location: "global"
49 |       vertex_credentials: "/secrets/google_vertexai.json"
50 |   - model_name: gemini-2.5-pro-preview-05-06
51 |     litellm_params:
52 |       model: vertex_ai/gemini-2.5-pro-preview-05-06
53 |       vertex_project: "ascii-450914"
54 |       vertex_location: "us-central1"
55 |       vertex_credentials: "/secrets/google_vertexai.json"
56 |   - model_name: gemini-2.5-flash-preview-04-17
57 |     litellm_params:
58 |       model: vertex_ai/gemini-2.5-flash-preview-04-17
59 |       vertex_project: "ascii-450914"
60 |       vertex_location: "us-central1"
61 |       vertex_credentials: "/secrets/google_vertexai.json"
62 |   - model_name: gemini-2.0-flash-preview-image-generation
63 |     litellm_params:
64 |       model: vertex_ai/gemini-2.0-flash-preview-image-generation
65 |       vertex_project: "ascii-450914"
66 |       vertex_location: "us-central1"
67 |       vertex_credentials: "/secrets/google_vertexai.json"
68 |   - model_name: gemini-2.0-flash-lite-001
69 |     litellm_params:
70 |       model: vertex_ai/gemini-2.0-flash-lite-001
71 |       vertex_project: "ascii-450914"
72 |       vertex_location: "us-central1"
73 |       vertex_credentials: "/secrets/google_vertexai.json"
74 |   - model_name: "gemma3:4b"
75 |     litellm_params:
76 |       model: "ollama_chat/gemma3:4b"
77 |       api_base: "http://ollama:11434"
78 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json.enc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"data": "ENC[AES256_GCM,data:q16oWolGXvGw7mrtpkt+icsKDu7ItXqFs67RktCmzqpFTMuRNshxIPv9A2Ej6JcqZylm8ep1Uvtd3FZBcp9L5bjzFX/145LazAdF3A93shotBDYyBo84HtgKN1x1tCuWIAcj4lZwANz7yqwnGqkQC25v4XDwErhbH63t+QfJeTXW4If75yeG7vJgteJWBYbINdYmjXRREpsUB1CiK/ITYoijSQko3/hj6kS1554NKUAEISdAeXSjbnzvrAvXfqFXWhRVpQ77hB3F7aq8JynDr27JBlTlJ5wiXbrWg9bMqKZbPUB8iKNj77jnnAa3shSA/Kv1OI2QLtTnihD+aEyzhYw/QZMB4kttFoEFlaI07aSok/1R6zmKC9dBWhY7ZEfo9WVljAnubqaZSevMUuGi3Bc1CUFEP478ledfNrPSo24w7gKJPR6LWt/KIT+6L3RiSGOyD0wv9lSQC/e9L5958RKaBQXvBu2qmiFPdheYTNyBHYvwXobzn+ZR4mfxh055Vb7JptbayYKVi3KQQBnvd4ubFtqCehi9PViXgum4TlUElL/gr3eKJKETL5t4g0ISQ65k6MWlxFGbKeZEZTvcIl9H0XCT4sTtTamPiRzmV3+nMsVJaInZxkceM2IIk6C6R3mnl2nE5+JimPJG/7yNW+beT+WFV7A/lOgHs2Ve9Ib4gNsHsePP4F6D6Mm2oJV/orQw4IP8dGKsy09ljbH/2aYt2gG4KpHCMDvuQU6zH1A1iOkomUtmd5ktHTDZTR2hCzGMR25EWQwb0pcNTP8hx9je4rteMvlVm3umaAWkM81CoN+zGd2fQ1UeTVXPpZK95gWsN6pMkSnQiAe4oPRjTF4dNmOaeJWzbhMn9Yro+WdxYvuA7bqzI4IA5Yac4A9/2z2kFullbp92yKQXORp4PHed4zb8PAqHpMTlLLSoc98K6xQBOQEn8wjTc3kg64ZRcGHfVuHv+3urbFYUVYsWOU0YmN9tm5bjEhMB/SueybwqNo9wT0DWf8ehkLg7vbVmkS5sc0ZRclHH7VpsTYGKZy0we5CS5S75YHEI3prUkwDM81Bg8MeQDCtReBQ/PVF8it1X8dQH7+n0nYFAMg70cqcJ5hXmqIBhBpZSPtKFtvnVK59xl/UXbY22t+cGF5XxIOV467RUr3lkFA/o5L51zrT3XHanY7x8XHm0tk4STaUyDULzTb8EaNKVZACiRWOmVDcc72Vs0/5YfZCRaX0oo15T6vSufidN5Ze228F7Z+loI3ZtFIGK/kdhYPgjuwIOQzUEXoluO+AGV9qzqlflAso2JIanATfGdlB3h0wRqFlDKLUg/7kQenBmaOqd1WAQlMGlG1OWQ61TtG+0Q/gjPDpFS2qz8niuWycVul44EX+0LtA1daBsscH1i0e4JiptzOBWOACQ2xvx6/mefJdC/t97cUlelOLmOROJpIgTP9cDAB5M2uV2GURddQAItQMIfeAqKyPcSFnwCHpdAq9u9Yrm/Djz5DYC25+DKv4Crrr9XumtFlSozjBXO7BESFRPPaBXMkfs+VL/1d/BYe2XrJUl5Lqls1S0To0Jx6y0K8mjnCp43YGY7PyzvkJgJWK73FoPAmPza1cHrDZi4ciORq1Bi4dpPj+VqcJP8Mrw1SUPoIT1Cm9R4J5o1p/FRQhIcX56NrNtGtmDkltJyHF9rrZgXSJldvEcfGeK480BK5xgeU2lRTGhvIaG8jmJF7h6U2pyp4EVzDrXlTa/xzcHQZUF2Nw4pMjdmb2h7gugh9UklzB90Q++gmi3JyvEPkuM6oQ5kWZ3WJ7B+fmhCFVrsKzMK/6G2oIjqArdlJLmhZ9bWspKF2vTlwvcK4s9tJhnrKFw00AFNDYhjh9oGrGWJUMLeFO+vtXUjHSjH0a+MvCGmr6xTwJczA/g1nagIUzi1+YGtMYCDcsFHOO648SpMCuHySbOcPHeW4oub8jUlanZM8QzDrA0jI4+krxTCx5Ia4XV2FMPEA13z8TDNZu/ZGBQptNNa7aEUUcFW2ghq65lyVy3e/g7WkDX+iGs1YGDVkexJ7aZTA5k7NqKqqwR9b8YBHeEhrxgoMRFH5D73kncnuOXSzH4/bIAPzTrL90bOWvC/t7zAueE5qlo/+0/U6cyJNwnCv7X6H/TzIbMT177hZ82YVye+R0075iYzhTWclSbFWho4lMLxhPmJ2iOoObk2qvEEX16mYKaL4Ig3EuUf0dSHEDNAiTpXTjftkhZ7Tle6+W4+o3luEy7VVhIzWuUFmB9oRxcktHBU6s1bnhvu/7Flef+ob1pvV+yCPYAq0PEzxDn/70gWLNBqQc4SRAWEqrs9S1O1TY4yHozjuDF0E/SLUPjCcgCokpOl1F17OlRpI65ywMOYGfgLkH4cGWDRzd5/aJPFUEhBwfdr1kfrACBqphaHbgm7UEvROBaL66frG/OfWDdcpsZo0Y6O9NgMRrlM66d0Nuo95Q7JfluhP0dFTqGiWZJYM2Rzrnory5lG/3Cf5eydumho0rucdKEKAzl6I0sd4VEN63CuKbxECJaj/mqtuM/ZTriKKoLzbBzBcpM7uPPJUAi3NpNC2pWLAbC7lCALrZWos4xW5VVMzH4+pC40M0M7HgdNZRZnh+NXLclJELUkPIR3jLd9wfJnbeYF2CJUT1sQEizW7ssMkP9Vel5BxEBLQqtLCbxUVSYwn2N9YXl/3eZAtX37ECH6mlHrGsmmBYCjChlus0ApobvO4Wlw1zFlhE77wLax2q89+wGq6nI4dUrz4PYWfy2/tY2XWlVu8xMBBbwDPIOt3Z8KZhZeTZYHxGDDDNHn6qGeOP3o1LZejyfhKJ143zLsRAwoCtXt+ffeNPczlsng3LyJYD9seQwdwI/8o8Y2d/AOlVQaFU5mawNQmLViHc85KDZ29GWjuDrhMlJiDZIiKFRlSGGNCiqciT+7RqdbDH8b0s0yff7OYmbwQlXGpTFEY8HezxGDyVpbCduMpNsj5RDab06ExbJcYOtRcisM4xQptlkiylaFaRIISpO25Z+Idv3epfIi6e4Ldasm24LKo/eaVyXtkKh34SbPLq0R2IIVPE5i1GTKx0O/7c/iiXf+0QWPgu0X6mGIQ+yoyAlU6qgrFOrbpqTfd1qpqC2JFT7UaRE,iv:JLLnRY3bfY5bzvNPAQOPyTor8VqVC31q8sunXDkT5WU=,tag:+HuK1F1a/2DE2unBHxvCzA==,type:str]",
 3 | 	"sops": {
 4 | 		"kms": null,
 5 | 		"gcp_kms": null,
 6 | 		"azure_kv": null,
 7 | 		"hc_vault": null,
 8 | 		"age": [
 9 | 			{
10 | 				"recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0",
11 | 				"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBRS213ZmVQMlFadjlBQjkz\nUW9uZkJxNmY0WmlCdkM0MHJnblhlTGxYTGpBCkxOdDk5aE5Od2FRUGVQa1hUeGFP\ncWxZSm1PVkxjS0M5di85T0owR0RVTTAKLS0tIE1zekw1NnNHZm5sMVlCMWFzVjFJ\nMFRwMFlpaXVLYTQ3UVVzcTdFbEFLUlEKkZcDIrYgquiFo6VX0h8571Ko7F8Np6Qm\nfoOOiMAX9DJrF2GphiYdM8YVQMCQxZ+RFvz+WWmA0j1krs2xeXmjJA==\n-----END AGE ENCRYPTED FILE-----\n"
12 | 			}
13 | 		],
14 | 		"lastmodified": "2025-07-20T18:20:20Z",
15 | 		"mac": "ENC[AES256_GCM,data:ZbkbXuIgkvmY53KE0eLYPpftLxd05ykmgmHU8TNpVXDS1RwhG2Gd5wstmdb1y6CiZ7on5cW3wG+vaG9N23SpX0LlUUz01m1DAnRPMThY93/25C6DSUWRTEMLgL6knMrzVygeeQuk2EFiDZYV1SZgarillIZb4Swq77yTDxJDLZg=,iv:AtaINBAw351hXGFQC6CaZ6d9QyqxK4dlZZWxLWkwrE8=,tag:u85Qe0WqeJgmrbeaP5ZJJQ==,type:str]",
16 | 		"pgp": null,
17 | 		"unencrypted_suffix": "_unencrypted",
18 | 		"version": "3.9.4"
19 | 	}
20 | }


--------------------------------------------------------------------------------
/services/llm-router/google_vertexai.json.enc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"data": "ENC[AES256_GCM,data:MasLFwKDcxjF1XHQfkMckAlhRLn8IiEJgPGVjCb8tYp83tsZ/xUXpU6h8J2NCpcm1tZmJJZZ+wSwB0reCGPqzR69SmA5044buMBPknewPA6hebInzgCoorcEFjdYQfQChRIVM1oYQezE0MuZg45bsHH2f7BfrvE+DeN4J1iRWG2B8CNa8VA0wHZ0IBgIE6+Zx+aVrYyc/ZVe9Dz5F9/NcCA/YvNJ1ebmKY7BA1lzXP5BVm8nevVy6v/d7BCUvE+UQa5GIhHLTQUaLWb61kcb6VKxkdiQqZCMSKnjLIHvDa2WKjqeidL2XEv1e87qED3wmU44mX8g+Q2S0RwVa0nCmm1v28xb9kJfcjWS1SUA+BkPOnHTRyfVI3qy9l2QzkBeppkXrfSjhDHhfK2HMppGmg30hGznPYqY9pPai/E21k0Ls8dqX/3cEk796c8Nh0C3T2266elaPjhAzEOCmRWD+NmQ45FReONMCszQtkQ9sbGc8u9JEEqXSWz9HENLtbyXtg3jp04pxau0tSyJeMpDpcSEVWHTwWhcqLYn0tyPTm7hpUZ9U8apvqYfgQZQ3/JjHcpSlwcY4nFG3pWgbaZh7TfP4wSncjuLfFqrkdKIw5Ym7pSd61A6z1dY/PeHWv/qB15Q/4kcf132I4DIlopQAjht9T3+MLMjvoXBhk5RbGt+5HZ5XTDbNoMT+/DUO6FgIqjje18jQd6Ld2s1mIZ8vl179H8kNfyz6M2mVXYCNIS9/J4AXV3bQjbk9a7kHU2RpPfNi6yBvMsmb4nPCuDSmreTyevHDwSsd3MEI4aRCCySurgqwdT8iPd5+ERcGZ2OccKaFRrkOTjsnADLf0xpl24BEBvqt8mKrnp2kI9rzBdoSXr4ayUsHRIw/DecYqrCTl5mripacYkMIILv/vfJUPgr138Un8Ativqeesl+6dlvekh3wwDuhFptJ0a618Ds4N0koYoSNhKyZJZuDRBUX1++bNdL58KW5ykelHzGxW8UmYbXAAVNTmYB1NTP5DK18GP1aS1yM8ngIGW2cYinOa4McHXwNX2YAErRvwOzJrOlXHHUgo9frHLJNouiZq4KK+ntln8y/6NFUwztRTsEQEAuYgZX/YK2Ahfmxld6jlLX5HE9FpFUkHzhSxYcj2nfoU2GJXBzsSOf6KbYlXmZ7HTswTCkHt2l37vbJMA1He0O+hg4RhLK46YIMODakCbtKfUHh7D5j5rZrKNNXyLTNBD8zSUUlAgpr6HtEOgFvabFGTsRa/BdrZ/zjPB3hoe2bqe6m5EsYIGQVIpSy3DZv4tEAaPmYFyivZMlrn+ofUYkP9FSIvfT+vlWp/4/3/Fdh2zBGYGmxBUGGSAYwTmhLxbU/M/hOG1+iY72lMQ7rtmAZympJuqhNr/gDkX4roweJ1tFYKAoVRLJTHkSppwV8n73amqWrFd6LuzOcJp6/NOOY4bvOY3TjpOJQcirTad7XWoUzI+CqDOIQt8NI4giMPA8ZzKUjkqZPqOX03eTXYbWvmqBrxSbcFawHCGT58PWQZ48RxpIVMkP6cmUlsZzrgURzNyScSkcseQe5Ir+6kbgjrO8yARv9LawpY05ccq5I4P4B+FBbO06wVPW83hDm9Emg0+3nt+yRQtojHORar+TJBL9qOE9WU+B6CEam5s1Lm6tbRk7M/te/r7zrYssdRihTN6TOTA32MHunR4KB5IsQjCX5hJywJ6GF3q5d4I8pecF3cDt4P60NJCo5Z7bo+2yzCYDUw8IYnemyLmV6FYkQmoYafWVHXVIXHodX1Olq6KiwCXOGwDXUgHk+LSMGEvrBB+/iuTRazw3CcCOABwjCctnJhExbyA14RS4Voc9bLLbn0v1SXYjEEGnoxvKtaYHQj0h1kO/cgt7ooJBUaUva6FltIY/Y6B9lsPq/fr0Auklf5JmIAECau1e5PopIkQ6IcA8bIrnG/VTHqtv578g6NVEPQYcSl6P8Chvtx7E9FBq4qhcUGy2cY4+pSVRXAoaH2b/FnIVZKN+3sbc/VWLomozwGyaL0ZqvVAQxFATr13YicTB4c1C9magG2PBSYk/ZvS657eVhirKmK5SAWzQT6OBbPeqL8aMttdTc+C4Go1/JI8MG79XHOYw0FlOVzImmpW8w3kgFYt+x/xGE8MGFsp1mbH+ELk6K8jkZQkWjYQKMicgrn6FwrCDPc2GNjqoozegBmzRfytVsJaphUsZJLpR2jMyYbvbHGvnj8/YEmsgKsaT4Y4EWy9E5ZAIA4UNeApiJVlPUmKCOlCiMeoBGP4m67ts7PmaU5odS1N2dfDK2ITcdmFE1HWcCgaEyRglnBPKqPlFVgRpb9TOdv8aEomXLz6KiIKy1nzgT5ODYPhgeXm5rgqb/qJZxMlN4rhJbWdq57jSR8LA6ywjrKUKlaohg702w/a1Sx/cUluGwkKfGtrgXWO4yii/1f0DQEFDzP4BktpY/rqIsjZLB1jlI/jNoI2VKUt89h5Q5nN1CtZNkMIkxKvjSwHOcgjjchOpdzNGEvtZsTmZXpRohFuaAzi3x4Sw59BItfg6HrQUCZvmKC4aIruKlozSbhIU4jflNXc0+cnW8sTwjU6X1qGUssFKSZYjRdIEb9U5V81WtTZSbRFme4X+XkSm7o5gEy9a93+WTDBhS+SrpT9V89RahYTg3hL+o8GefIJtDLZ1jH/opUZd2H4ufYC/jrcTS8zaZmU8tueqiskV+s+SkDauRorpBWTD0aT/DHx0ru41IStXj7ihiA07GZU1nO2iU2736FL5yTAEcwvnhc+mRTadaYHmJ7RcjGE9uawOW8645swGQ1GoUyThNn2V6wS0vVQyUuUP5z+gKQZbZjPtM30DY9UzbdGbPyCdUxNn3tXmqT9LHss0HKCW5PlU36ENmVI7Q/NWdtoeXPHmmgPIHrQq8oByYOUGbK7C8LL4H4BC5RPcHTHsMQGaDLjyglZTYWYaxsX/1E5dI+pheRqUkrd2lSd+EuFCbZwP2NkaQF1R57Tt890vTeJTuTIajlE8/BLlwsLGFlZBYOU5f+GrfPalCnCVo+6DVvujx2vJ3M36ovwzh7i8jkD8TWRnYfKZ5EkhemMoI4N4YOQWmF4ZPFW6AAZp0ufkgySKYye0zRz/rjwFJtvEDkhDi2iZmw7nA4/yKvVuMJgsbOYbqkC1uVA=,iv:MLMMHGYUJZXo2hbe9q6D3eaEFUbxBphz96BvEKIX3nQ=,tag:mUmLEsch/NqdTk97gQiYVg==,type:str]",
 3 | 	"sops": {
 4 | 		"kms": null,
 5 | 		"gcp_kms": null,
 6 | 		"azure_kv": null,
 7 | 		"hc_vault": null,
 8 | 		"age": [
 9 | 			{
10 | 				"recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0",
11 | 				"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBYaURHK0xlVUNWOWkyczRU\nSVN4cXBRczhrOExUNkI2L3NrU3NwRXBFbm5vCmZHdW1kMWJ0MnFGWGg3NXhwQ2FL\nM0kyTzlZaGs3eUhVVlR2enRxV0R4aTQKLS0tIFJXUFpkeC9NRFB3R2llbHFrTVl2\nd0VzdFd3cCthSkdPbmFGZFZaa0xjWWsKODX6RdDTvFrdpkVPd9V6kuV7xdVGrYyI\nMTFoF384flU75BXq5pnXvhsYRmf1JR4CH/nPLxp8HmJ7txuYS/MERg==\n-----END AGE ENCRYPTED FILE-----\n"
12 | 			}
13 | 		],
14 | 		"lastmodified": "2025-06-27T10:59:13Z",
15 | 		"mac": "ENC[AES256_GCM,data:ENKGRaba6XIK+6mw7gti75JGUsSnQwfhvahQxZF78pNDc9sxmXEQqKgfDOVDpJ+w4qoM2J3ancDKuajTTJE4SvzFigKv7tzryaz0AAB0VjkO4BME0uFWmjqwfZIMq+5jTjnILF7xLNJL8l2BZyCWZAplnPUGow1yoHvdTzrI5QE=,iv:UeCUCzurP/eIRxCM8UNsc/cjz19M4/3s7RRfJ/1iKuw=,tag:vT8QbCPjsVXUf6OM17Db/Q==,type:str]",
16 | 		"pgp": null,
17 | 		"unencrypted_suffix": "_unencrypted",
18 | 		"version": "3.9.4"
19 | 	}
20 | }


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "{{ cookiecutter.project_slug }}"
  3 | version = "1.0.0"
  4 | description = "{{ cookiecutter.project_name }}"
  5 | authors = [{ name = "{{ cookiecutter.author }}", email = "{{ cookiecutter.author_email }}" }]
  6 | requires-python = ">=3.13,<3.14"
  7 | 
  8 | [tool.pixi.project]
  9 | platforms = ["linux-64", "osx-arm64", "win-64", "linux-aarch64"]
 10 | channels = ["conda-forge"]
 11 | 
 12 | [build-system]
 13 | build-backend = "hatchling.build"
 14 | requires = ["hatchling"]
 15 | 
 16 | [tool.pixi.environments]
 17 | template = { features = ["template"], solve-group = "template" }
 18 | ci-validation = { features = [
 19 |   "ci-basics",
 20 | 
 21 | ], solve-group = "default" }
 22 | secops = { features = ["secops"], solve-group = "secops" }
 23 | dev = { features = [
 24 |   "ci-basics",
 25 | ], solve-group = "default" }
 26 | 
 27 | [tool.pixi.feature.ci-basics.dependencies]
 28 | yamllint = ">=1.35.1,<2"
 29 | taplo = ">=0.9.3,<0.10"
 30 | pytest = ">=8.3.4,<9"
 31 | pytest-mock = ">=3.14.0,<4"
 32 | pytest-cov = "~=6.0.0"
 33 | ruff = ">=0.9.4,<1"
 34 | pyright = "~=1.1.393"
 35 | git = "~=2.47.1"
 36 | 
 37 | [tool.pixi.feature.ci-basics.pypi-dependencies]
 38 | moto = "~=5.0.28"
 39 | nbqa = "~=1.9.1"
 40 | 
 41 | 
 42 | [tool.pixi.feature.template.dependencies]
 43 | cruft = "~=2.16.0"
 44 | 
 45 | [tool.pixi.feature.template.pypi-dependencies]
 46 | jinja2-ospath = ">=0.3.0,<0.4.0"
 47 | 
 48 | [tool.pixi.feature.secops.dependencies]
 49 | go-sops = "~=3.9.4"
 50 | age = "~=1.2.1"
 51 | 
 52 | [tool.pixi.feature.secops.pypi-dependencies]
 53 | 
 54 | [tool.pixi.tasks]
 55 | 
 56 | [tool.pixi.tasks.fmt]
 57 | cmd = "pixi run -e ci-basics ruff format ./src && ruff check --fix ./src  && nbqa 'ruff format' src/* && yamllint -c yamllintconfig.yaml . && taplo fmt"
 58 | description = "Format python files"
 59 | env = { RUST_LOG = "warn" }
 60 | 
 61 | [tool.pixi.tasks.fmt-unsafe]
 62 | cmd = "pixi run -e ci-basics nbqa 'ruff format' src/* && ruff format ./src && nbqa ruff --fix --unsafe-fixes ./src/* && ruff check --fix --unsafe-fixes ./src && yamllint -c yamllintconfig.yaml . && taplo fmt"
 63 | description = "Format python files - apply automatic ruff unsafe fixes"
 64 | 
 65 | [tool.pixi.tasks.lint]
 66 | cmd = "ruff check ./src && yamllint -c yamllintconfig.yaml . && taplo check && pyright"
 67 | description = "Validate formatting and type check python files"
 68 | 
 69 | [tool.pixi.tasks.test]
 70 | cmd = "pytest --ignore=src/{{ cookiecutter.project_slug_pixi }}/code_location_{{ cookiecutter.project_slug_pixi }}_dbt/dbt_packages src"
 71 | description = "Validate formatting and type check python files"
 72 | 
 73 | [tool.pixi.tasks.tpl-update]
 74 | cmd = "pixi run -e template cruft update"
 75 | description = "Update from template"
 76 | 
 77 | [tool.pixi.tasks.cleanup-state]
 78 | cmd = "rm -rf {{ cookiecutter.state_path }}"
 79 | description = "clean state directory"
 80 | 
 81 | [tool.pixi.tasks.secrets-encrypt]
 82 | cmd = "pixi run -e secops ./scripts/encrypt_secrets.sh"
 83 | description = "encrypt secrets with SOPS and AGE"
 84 | 
 85 | [tool.pixi.tasks.secrets-decrypt]
 86 | cmd = "pixi run -e secops ./scripts/decrypt_secrets.sh"
 87 | description = "decrypt secrets with SOPS and AGE"
 88 | 
 89 | [tool.pixi.tasks.clean-local-branches]
 90 | cmd = "pixi run -e ci-basics ./scripts/git_clean_local_branches.sh"
 91 | description = "cleanup local non used branches"
 92 | 
 93 | [tool.ruff]
 94 | exclude = [
 95 |   ".git",
 96 |   "__pycache__",
 97 |   "docs/source/conf.py",
 98 |   "old",
 99 |   "build",
100 |   "dist",
101 |   ".pixi",
102 |   "src/{{ cookiecutter.project_slug }}/code_location_{{ cookiecutter.project_slug }}_dbt/dbt_packages",
103 |   "*.ipynb",
104 | ]
105 | 
106 | line-length = 88
107 | 
108 | [tool.ruff.lint]
109 | ignore = ["E501"]
110 | select = ["F", "E", "W", "C", "B", "I"]
111 | 
112 | [tool.ruff.lint.mccabe]
113 | max-complexity = 5
114 | 
115 | [tool.pyright]
116 | include = [
117 |   "src/code_location_{{ cookiecutter.project_slug }}",
118 | ]
119 | exclude = [
120 |   "src/code_location_{{ cookiecutter.project_slug }}/build",
121 | ]
122 | pythonVersion = "3.13"
123 | venvPath = ".pixi/envs"
124 | venv = "ci-validation"
125 | extraPaths = [
126 |   "src/code_location_{{ cookiecutter.project_slug }}",
127 |   "src/code_location_foo",
128 |   "src/shared_library",
129 | ]
130 | 
131 | reportMissingTypeStubs = false
132 | reportImportCycles = "error"
133 | useLibraryCodeForTypes = true
134 | typeCheckingMode = "standard"
135 | 
136 | 
137 | reportArgumentType = "warning"
138 | reportCallIssue = "warning"
139 | reportOptionalMemberAccess = "warning"
140 | reportOptionalSubscript = "warning"
141 | 
142 | [tool.taplo]
143 | exclude = []


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/.env.enc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"data": "ENC[AES256_GCM,data:fshJya0TEVfaYQvp2XIPpl9hJlbGxpdcyIQBceITg4BC0kcOGh3akHjkvKjIWqfs7TNDeOVKs6oegicVcC5Lii11tF6rWG3CiohdvePKLQhYW4dT8A/C1AUrwWhig/pJjvImB1iEQIVQ5uB5Hgn/4z3YpsPrKMbGFEdlOVpa5+jdoPHnC4ALCoIV8Hh7wv/vMmI+Ayl33dcpcpjWADDSDNjHFDRyCdgTqkWSgaMTw9QMKL/fVgiG+scxGgUBXQkl86Fs5e/ARWstADmbhcmoOtRgYRWwOgSI9sUjQwZON0veP7X8HvXEvaoLhJBLi6rY3507keQlTBea6xz6GkGIXUSR+BwlKj3xaaACjwNnTb6UstZcR7YGeoLPEjEt5d2zA4bJ3gxEX8ttVNh4Df8xJzgKA361gWr6blXNo2fT8r4MSWmHSZem8Jvfh8nzvNjzv2RWpVEkCgiHZXSmxDYSM+WN56vyVVPd+UCD92/fgF2mjLGmrjuJBvPinVltsTEXjnhs7/Eb2YzdGrKJBz+/h+eSF94L2J0ET1NLrKd74VduF0mwmBlxGcZ/IEwCwvYW285jbpNGFE6KDEgoArRMnw0amM68ZdkFZGJm+KwFEXihYqZ6ZTxyTmLTNMqe5C1sLlrVLkGehNOHhjetMtO5Z3VtHIQIZ7s3L9C4dFd9d2VMkdGVoeydyhy4OcnbPy75HkhTA5WwaZ7nD5rJIenVlFaH8Yudj4431eAYm20r45YaWiqsOSc3onfGBEoO0hi5JJSE9JH9NNB49xq1ThDQfjc25sw9nAL/9QFKCeLht0XB988AjH5aAWayshV0DS7HT9MQt+p1kn8GntEeFt2ItFCmNToOmN8czH8dqgcB/p5Ys26IOy98frdaSZLvJCnI8pRmpYluLhYICdvGlQZxEKZtFeqN0dExfHyH3dCc/7iGy80CYLvgT3/aQq+6qMgX3SfwrRsdkm2CTV0Y9jz/OezAbTlrMgSSsV9OhwrwtmIdLJRuZtDaQKOnt4flFGAR1Qhh5ePhyPCgupZoo+JrMTfXONW4eOA4gXeTNpFxt1I/aurmx0ZfHyYkTsEVZwYp1X0kCF+OLcS3UerwayrU0x1ajNuoUZ4UJw66Ea3hL9qD7bXErfTVzXME7kuvmSJZn8sgnGCuk+cBmbmhjG6hSnjx99YJxVLAd+8Kg9DBovTcv3AQfiazvhnsYks0pkj476oZKixEK/mcGt1CmqSFZ7fmMJrbEOAEHvBD5d6ZsEHOVW6bUw+t5uaPiDzQTVeZZ1mGsi3/RBHiKgXcZ2jhMgzYFppkr7Wm+RkLxr0YAei2zAuoqZRe9YloozA2Q7riQ4ZOEwqa8otOwfOI/bct/ungic9VHozj4yGjDPo5myKKjyg6KyUzFhcXSVbpOF860mZSDXIkRF1WenTzVRaC2lVb2e1KiKqFMROMPxeeopuTD7R0ck3p50WXJDzEQHBC35eUQaXCN/6oGtKs0KEDsHBGc2VVT1oEkF4E/34GwzbBrzAJ4Z/XvHb3nV9PI3bcSF19WLENcuYED2R9K0rEi9zHQU2RVR2yrs9CypWYSRzL2scOyE+jMr54vwdzT7zVCIY6d34OnCPkHCuME44MSVk9+zvgmWTVhiL7Bu5PSSpmf/tdgbv5sIZTTNqU1OqEtIUqwXc/n7hq41HdCM9FXtp1C1Xg7I6QWSg/EYxYHtXLV730FEi7+lWT5XwvlplZt6UeMqAUI943LwILb8M9nONGMVBXzKO2r/Tgicfxmg5VlyCAOq2DqSXO416jqZmHMqdifN56ieiRr7aawtUaUUElqjsvFA4EfIUL3MQqmborhlbiAMzh/sH/aITXp0XN/PwltfKMauuwpT3/jnNjE01rzyPeUv6Pk4up5nfsxEFOZuQPq8f8OpMTm+FPlio1GFDqL0nHg8/pU63fc1aujWYORG+xWLvQjPOdetbv5l4QEEIB5Z8glcBPSApApEOR7yYDIm2HLhirFbAVthHN0KvxKvg/NC9R7vsiE4vOT0Khhv0am1SFa+MnVOfSrFnhraOtjKAgUW2U4n5/KokgfGPm7MLFwdGiCCoSxWXouRPZ1wmrZO/kgxVnMnBHRd9aA1HltIWrsLtZslv0zR17Jtxsrk4nRwpk5Z6qT5vyMrray0xhb8jNwInUm+0BiMM3L2x2qZM4ZSuhK0gt7sR4LjdwTVsof7MB2AFSH6B7NtLJoq9aUmLyMh6tmGzeFdAJolpEWyznH4vkrGeWCdCxE3ewX3IxM0NHX9QrcRNoTcjZz49WBD1me0BkJaTQLOaH4sv75spaotryVQajrmxzbHFve8ijVg19H0hmNFmMulavEJH5MMcrAJ4E6ohMMHqdPWxrFrEIhF4EAIyAHuCfjmU26o1GOxGF2/BFj0BbaCq0YTGePRq4gFn0RRnGrtMWgqrbeOi1l1RVdzJOecou9mMUQIn8X6X0s2ffQR2s5re6SRTleeKa8ZPHcHLORlOqKupQw+VCfSCo9sQTeSbdG2PqwLpM7+SKkmkIGxigCHuVJuR18kiizlBiqFnVY4pf1LEgEdOcDArWw69xC5Dt92otIlsGceMkR8Tw8+MjDl+T/VW7p06S8TJJURnieqfLfcmjg8V/3DP792wWtEdIUPFa/ky5Wqp4OJwD+UIjCxAcimody1aTThvuUhMge1QDsp5mtpxAacnL295bKZwObIOzGforK8zc+MgONoZhUblbLn7qspTB5zd8yEzkit9eIkLDOzFZG1vX9OJXCrbP2411oZp8gzJ2KFnAG2Wq/jhwofA06torI28DgP6NoJhG4rArGaLAtEu0D2TZHXbfpcTcAGCNCKHoujI7XpDBcrNE3T561du5AMfINuacFh5PF49p2Hba/jpfR0PMtIyE8/IExBlMzTn9k/2TLolfWR6jeIfQ+xjI2SpHpKh4dbWHq/T5bZFNuAvPl8s+Fpq3+7FuzLWs665XrEU/rFlHdiMmwfahQVWaz8p6CUrgRkR/2xB78Hj0AslrVTpET0txpuwtPrnl5A8fVtOeQl2pqE3bLaDRBu+pNgZRtJhcn96BSYFZrz5LcOHHDzk3w/KgAPcu/QmkaBP1Ui5UCMOuwQP+lNKf05qVLIiq7uNLCDtuUB+cf2AglQB5iv6KLyyjStw7LV6jmQI+p4UXDPMUjLxymeYtW8upC7cywTMa6/Tb1TITPswMV46w/vEgJo0JPyME901oUdLKBZorm9ru4MyOt9hJowKNg05jAuWavNzZrNuL8sa83BrGGiyvU8cgdTX6xYPV1i0vuBDYSzlgCz/O1cTdGn2ev9w2O0//8EqsEwGFzx292lHRXqs7NU7ikUMrzgE5HSxs+cmYMy4RCrZgTsMCJIWbEkVdckUR3n/9BQ2Ererz0nhXrK3WGcz5GWDfsWlc6gJwId4/IjZZ7yeoseswf1viRrA9a56a5L/rsO5i13z3FTI/217vrQMGUCnAU8we/NFTS5QU5Q8MdPRhLvImzWGCPFJWeUICNYp+Y+in3i0X0JET0GxF/oxK+8tQKk+oLbmGgBLJObuP0F4IcO8Vw298DQTNUZU4POAXAr3NQnCD1I6cSrtErdoqtBj0wbgT0AMZwXeq4UWYGnx1n797rVq1yjSFIzqNMkqsZUnhRrfyqT3VIkFKPUdpbZjY1UKYUbY=,iv:vmMSzrWHa09eKQELkpQmWJAn6IdEACaaz1avi+Bm+78=,tag:YZf7lm3gK9SkM6EAXIgljg==,type:str]",
 3 | 	"sops": {
 4 | 		"kms": null,
 5 | 		"gcp_kms": null,
 6 | 		"azure_kv": null,
 7 | 		"hc_vault": null,
 8 | 		"age": [
 9 | 			{
10 | 				"recipient": "age1ph7watxp99nsl8ejs3snrf2jykwxc9j3va00z5xrywttyms3af9q4chaa0",
11 | 				"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBQUC9mSUU2ckdIaHpoaURr\nYVRFNExqQ3JFYUtjMlB5LzFJdjdLSE9qOGhVCldxeVlRMW9NR2QwTGtMSGl5ZjlB\nUUFQWDQxUFdjRXJpdDJFTG52RElxK28KLS0tIE01YUptN0tObklJM1VrMDFYUkl1\nUUpJbzNVRzY1RzhUc3BvTGUrN3orVW8KBPwIDpzBJztR3FeCD5G83zL/skUMLWaL\nkYIZO4VE0XImokfHp23wYQ+xZXsOYCZ/PRaj4WehF4qWU0vhWVJdYg==\n-----END AGE ENCRYPTED FILE-----\n"
12 | 			}
13 | 		],
14 | 		"lastmodified": "2025-07-16T11:44:07Z",
15 | 		"mac": "ENC[AES256_GCM,data:nvefZh1h+jsveDF/zv0Nrpf1NR3qpSMHubWnwIAfYweVIul+TVYx9cucb/zDvWYv8fqWkCDUoEznFXprO3M/guLcZFClipTmBxhqEwnWfy7M3HIY7oN+FfV6cCid0I1CmoVCJ4RA8ZEteZR9tl+B7WC3n1ChK/2l7m7ovnXOIE0=,iv:dWnBunipiF9ju67qLKEDU6h7IG/zGMtF8pCMRyfH3lY=,tag:PgQJas5uSAKyI9SI0o/SJg==,type:str]",
16 | 		"pgp": null,
17 | 		"unencrypted_suffix": "_unencrypted",
18 | 		"version": "3.9.4"
19 | 	}
20 | }


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/generate-env.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Generate .env file from .env_template with auto-generated secure values
  4 | 
  5 | if [ -f .env ]; then
  6 |     echo "⚠️  .env file already exists!"
  7 |     read -p "Do you want to overwrite it? (y/N): " -n 1 -r
  8 |     echo
  9 |     if [[ ! $REPLY =~ ^[Yy]$ ]]; then
 10 |         echo "Exiting without changes."
 11 |         exit 1
 12 |     fi
 13 | fi
 14 | 
 15 | echo "🔧 Generating .env file from .env_template..."
 16 | 
 17 | # Function to generate random hex string
 18 | generate_hex() {
 19 |     openssl rand -hex 32
 20 | }
 21 | 
 22 | # Function to generate simple password
 23 | generate_password() {
 24 |     # Generate a pronounceable password for easier initial setup
 25 |     echo "$(openssl rand -base64 12 | tr -d "=+/" | cut -c1-8)$(shuf -i 1000-9999 -n 1)"
 26 | }
 27 | 
 28 | # Start with template
 29 | cat > .env << 'EOF'
 30 | # Domain configuration
 31 | ROOT_DOMAIN=llminabox.geoheil.com
 32 | # ROOT_DOMAIN=project.docker # see LOCALDNS.md for more information
 33 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22
 34 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12
 35 | TZ=UTC
 36 | EOF
 37 | 
 38 | # Add database configuration with standard naming
 39 | cat >> .env << 'EOF'
 40 | 
 41 | # LLM Router Database (LiteLLM)
 42 | LLM_ROUTER_DB=litellm
 43 | LLM_ROUTER_DB_USER=litellm
 44 | EOF
 45 | echo "LLM_ROUTER_DB_PASSWORD=$(generate_hex)" >> .env
 46 | 
 47 | # Add LiteLLM configuration
 48 | cat >> .env << 'EOF'
 49 | 
 50 | # LiteLLM Configuration
 51 | EOF
 52 | echo "LITELLM_MASTER_KEY=$(generate_hex)" >> .env
 53 | echo "LITELLM_SALT_KEY=$(generate_hex)" >> .env
 54 | echo "LITELLM_UI_USERNAME=admin" >> .env
 55 | echo "LITELLM_UI_PASSWORD=$(generate_password)" >> .env
 56 | 
 57 | # Add API keys - check environment first
 58 | cat >> .env << 'EOF'
 59 | 
 60 | # API Keys for Model Providers
 61 | EOF
 62 | 
 63 | # Check for OpenAI key in environment
 64 | if [ -n "$OPENAI_API_KEY" ]; then
 65 |     echo "ROUTER_OPENAI_API_KEY=$OPENAI_API_KEY" >> .env
 66 |     echo "   ✅ Found OpenAI API key in environment"
 67 | else
 68 |     echo "# Get from: https://platform.openai.com/api-keys" >> .env
 69 |     echo "ROUTER_OPENAI_API_KEY=sk-CHANGEME_YOUR_OPENAI_KEY" >> .env
 70 | fi
 71 | 
 72 | # Check for Anthropic key in environment
 73 | if [ -n "$ANTHROPIC_API_KEY" ]; then
 74 |     echo "ROUTER_ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY" >> .env
 75 |     echo "   ✅ Found Anthropic API key in environment"
 76 | else
 77 |     echo "" >> .env
 78 |     echo "# Get from: https://console.anthropic.com/settings/keys" >> .env
 79 |     echo "ROUTER_ANTHROPIC_API_KEY=sk-CHANGEME_YOUR_ANTHROPIC_KEY" >> .env
 80 | fi
 81 | 
 82 | # Check for Hugging Face key in environment
 83 | if [ -n "$HUGGING_FACE_HUB_TOKEN" ]; then
 84 |     echo "" >> .env
 85 |     echo "HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN" >> .env
 86 |     echo "   ✅ Found Hugging Face API key in environment"
 87 | else
 88 |     echo "" >> .env
 89 |     echo "# Hugging Face API Key" >> .env
 90 |     echo "# Get from: https://huggingface.co/settings/tokens" >> .env
 91 |     echo "HUGGING_FACE_HUB_TOKEN=hf_CHANGEME_YOUR_HUGGING_FACE_API_KEY" >> .env
 92 | fi
 93 | 
 94 | # Add Chat UI database configuration
 95 | cat >> .env << 'EOF'
 96 | 
 97 | # Chat UI Database (OpenWebUI)
 98 | CHAT_UI_DB=openwebui
 99 | CHAT_UI_DB_USER=openwebui
100 | EOF
101 | echo "CHAT_UI_DB_PASSWORD=$(generate_hex)" >> .env
102 | echo "CHAT_UI_SECRET_KEY=$(generate_hex)" >> .env
103 | 
104 | # Add Qdrant configuration
105 | cat >> .env << 'EOF'
106 | 
107 | # Vector Database (Qdrant)
108 | EOF
109 | echo "QDRANT__SERVICE__API_KEY=$(generate_hex)" >> .env
110 | 
111 | echo ""
112 | echo "✅ .env file generated successfully!"
113 | echo ""
114 | echo "📋 Configuration summary:"
115 | echo "   - Timezone: UTC"
116 | echo "   - Domain: project.docker"
117 | 
118 | # Check which API keys still need to be added
119 | missing_keys=""
120 | if [ -z "$OPENAI_API_KEY" ] && grep -q "CHANGEME_YOUR_OPENAI_KEY" .env; then
121 |     missing_keys="$missing_keys\n   - ROUTER_OPENAI_API_KEY: Get from https://platform.openai.com/api-keys"
122 | fi
123 | if [ -z "$ANTHROPIC_API_KEY" ] && grep -q "CHANGEME_YOUR_ANTHROPIC_KEY" .env; then
124 |     missing_keys="$missing_keys\n   - ROUTER_ANTHROPIC_API_KEY: Get from https://console.anthropic.com/settings/keys"
125 | fi
126 | if [ -z "$HUGGING_FACE_HUB_TOKEN" ] && grep -q "CHANGEME_YOUR_HUGGING_FACE_API_KEY" .env; then
127 |     missing_keys="$missing_keys\n   - HUGGING_FACE_HUB_TOKEN: Get from https://huggingface.co/settings/tokens"
128 | fi
129 | 
130 | if [ -n "$missing_keys" ]; then
131 |     echo ""
132 |     echo "⚠️  IMPORTANT: You still need to add your API keys:"
133 |     echo -e "$missing_keys"
134 | fi
135 | 
136 | echo ""
137 | echo "📝 The generated passwords are for development use. For production, use stronger passwords!"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,git
  3 | 
  4 | ### Git ###
  5 | # Created by git for backups. To disable backups in Git:
  6 | # $ git config --global mergetool.keepBackup false
  7 | *.orig
  8 | 
  9 | # Created by git when using merge tools for conflicts
 10 | *.BACKUP.*
 11 | *.BASE.*
 12 | *.LOCAL.*
 13 | *.REMOTE.*
 14 | *_BACKUP_*.txt
 15 | *_BASE_*.txt
 16 | *_LOCAL_*.txt
 17 | *_REMOTE_*.txt
 18 | 
 19 | ### Python ###
 20 | # Byte-compiled / optimized / DLL files
 21 | __pycache__/
 22 | *.py[cod]
 23 | *$py.class
 24 | 
 25 | # C extensions
 26 | *.so
 27 | 
 28 | # Distribution / packaging
 29 | .Python
 30 | build/
 31 | develop-eggs/
 32 | dist/
 33 | downloads/
 34 | eggs/
 35 | .eggs/
 36 | lib/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | wheels/
 42 | share/python-wheels/
 43 | *.egg-info/
 44 | .installed.cfg
 45 | *.egg
 46 | MANIFEST
 47 | 
 48 | # PyInstaller
 49 | #  Usually these files are written by a python script from a template
 50 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 51 | *.manifest
 52 | *.spec
 53 | 
 54 | # Installer logs
 55 | pip-log.txt
 56 | pip-delete-this-directory.txt
 57 | 
 58 | # Unit test / coverage reports
 59 | htmlcov/
 60 | .tox/
 61 | .nox/
 62 | .coverage
 63 | .coverage.*
 64 | .cache
 65 | nosetests.xml
 66 | coverage.xml
 67 | *.cover
 68 | *.py,cover
 69 | .hypothesis/
 70 | .pytest_cache/
 71 | cover/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | db.sqlite3-journal
 82 | 
 83 | # Flask stuff:
 84 | instance/
 85 | .webassets-cache
 86 | 
 87 | # Scrapy stuff:
 88 | .scrapy
 89 | 
 90 | # Sphinx documentation
 91 | docs/_build/
 92 | 
 93 | # PyBuilder
 94 | .pybuilder/
 95 | target/
 96 | 
 97 | # Jupyter Notebook
 98 | .ipynb_checkpoints
 99 | 
100 | # IPython
101 | profile_default/
102 | ipython_config.py
103 | 
104 | # pyenv
105 | #   For a library or package, you might want to ignore these files since the code is
106 | #   intended to run in multiple environments; otherwise, check them in:
107 | # .python-version
108 | 
109 | # pipenv
110 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
111 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
112 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
113 | #   install all needed dependencies.
114 | #Pipfile.lock
115 | 
116 | # poetry
117 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
119 | #   commonly ignored for libraries.
120 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121 | #poetry.lock
122 | 
123 | # pdm
124 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125 | #pdm.lock
126 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127 | #   in version control.
128 | #   https://pdm.fming.dev/#use-with-ide
129 | .pdm.toml
130 | 
131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
132 | __pypackages__/
133 | 
134 | # Celery stuff
135 | celerybeat-schedule
136 | celerybeat.pid
137 | 
138 | # SageMath parsed files
139 | *.sage.py
140 | 
141 | # Environments
142 | .env
143 | .venv
144 | env/
145 | venv/
146 | ENV/
147 | env.bak/
148 | venv.bak/
149 | 
150 | # Spyder project settings
151 | .spyderproject
152 | .spyproject
153 | 
154 | # Rope project settings
155 | .ropeproject
156 | 
157 | # mkdocs documentation
158 | /site
159 | 
160 | # mypy
161 | .mypy_cache/
162 | .dmypy.json
163 | dmypy.json
164 | 
165 | # Pyre type checker
166 | .pyre/
167 | 
168 | # pytype static type analyzer
169 | .pytype/
170 | 
171 | # Cython debug symbols
172 | cython_debug/
173 | 
174 | # PyCharm
175 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
176 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
177 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
178 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
179 | #.idea/
180 | 
181 | ### Python Patch ###
182 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
183 | poetry.toml
184 | 
185 | # ruff
186 | .ruff_cache/
187 | 
188 | # LSP config files
189 | pyrightconfig.json
190 | 
191 | ### VisualStudioCode ###
192 | .vscode/*
193 | !.vscode/settings.json
194 | !.vscode/tasks.json
195 | !.vscode/launch.json
196 | !.vscode/extensions.json
197 | !.vscode/*.code-snippets
198 | 
199 | # Local History for Visual Studio Code
200 | .history/
201 | 
202 | # Built Visual Studio Code Extensions
203 | *.vsix
204 | 
205 | ### VisualStudioCode Patch ###
206 | # Ignore all local history of files
207 | .history
208 | .ionide
209 | 
210 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git
211 | 
212 | 
213 | {{ cookiecutter.project_slug }}/.env
214 | {{ cookiecutter.project_slug }}/pyproject.toml
215 | {{ cookiecutter.project_slug }}/key.txt
216 | {{ cookiecutter.project_slug }}/pyproject.toml
217 | {{ cookiecutter.project_slug }}/services/llm-router/google_vertexai.json
218 | rendered-template/
219 | key.txt
220 | {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json
221 | .idea/
222 | llm_in_a_box/
223 | 


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,git
  3 | 
  4 | ### Git ###
  5 | # Created by git for backups. To disable backups in Git:
  6 | # $ git config --global mergetool.keepBackup false
  7 | *.orig
  8 | 
  9 | # Created by git when using merge tools for conflicts
 10 | *.BACKUP.*
 11 | *.BASE.*
 12 | *.LOCAL.*
 13 | *.REMOTE.*
 14 | *_BACKUP_*.txt
 15 | *_BASE_*.txt
 16 | *_LOCAL_*.txt
 17 | *_REMOTE_*.txt
 18 | 
 19 | ### Python ###
 20 | # Byte-compiled / optimized / DLL files
 21 | __pycache__/
 22 | *.py[cod]
 23 | *$py.class
 24 | 
 25 | # C extensions
 26 | *.so
 27 | 
 28 | # Distribution / packaging
 29 | .Python
 30 | build/
 31 | develop-eggs/
 32 | dist/
 33 | downloads/
 34 | eggs/
 35 | .eggs/
 36 | lib/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | wheels/
 42 | share/python-wheels/
 43 | *.egg-info/
 44 | .installed.cfg
 45 | *.egg
 46 | MANIFEST
 47 | 
 48 | # PyInstaller
 49 | #  Usually these files are written by a python script from a template
 50 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 51 | *.manifest
 52 | *.spec
 53 | 
 54 | # Installer logs
 55 | pip-log.txt
 56 | pip-delete-this-directory.txt
 57 | 
 58 | # Unit test / coverage reports
 59 | htmlcov/
 60 | .tox/
 61 | .nox/
 62 | .coverage
 63 | .coverage.*
 64 | .cache
 65 | nosetests.xml
 66 | coverage.xml
 67 | *.cover
 68 | *.py,cover
 69 | .hypothesis/
 70 | .pytest_cache/
 71 | cover/
 72 | 
 73 | # Translations
 74 | *.mo
 75 | *.pot
 76 | 
 77 | # Django stuff:
 78 | *.log
 79 | local_settings.py
 80 | db.sqlite3
 81 | db.sqlite3-journal
 82 | 
 83 | # Flask stuff:
 84 | instance/
 85 | .webassets-cache
 86 | 
 87 | # Scrapy stuff:
 88 | .scrapy
 89 | 
 90 | # Sphinx documentation
 91 | docs/_build/
 92 | 
 93 | # PyBuilder
 94 | .pybuilder/
 95 | target/
 96 | 
 97 | # Jupyter Notebook
 98 | .ipynb_checkpoints
 99 | 
100 | # IPython
101 | profile_default/
102 | ipython_config.py
103 | 
104 | # pyenv
105 | #   For a library or package, you might want to ignore these files since the code is
106 | #   intended to run in multiple environments; otherwise, check them in:
107 | # .python-version
108 | 
109 | # pipenv
110 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
111 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
112 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
113 | #   install all needed dependencies.
114 | #Pipfile.lock
115 | 
116 | # poetry
117 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
118 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
119 | #   commonly ignored for libraries.
120 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
121 | #poetry.lock
122 | 
123 | # pdm
124 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
125 | #pdm.lock
126 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
127 | #   in version control.
128 | #   https://pdm.fming.dev/#use-with-ide
129 | .pdm.toml
130 | 
131 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
132 | __pypackages__/
133 | 
134 | # Celery stuff
135 | celerybeat-schedule
136 | celerybeat.pid
137 | 
138 | # SageMath parsed files
139 | *.sage.py
140 | 
141 | # Environments
142 | .env
143 | .venv
144 | env/
145 | venv/
146 | ENV/
147 | env.bak/
148 | venv.bak/
149 | 
150 | # Spyder project settings
151 | .spyderproject
152 | .spyproject
153 | 
154 | # Rope project settings
155 | .ropeproject
156 | 
157 | # mkdocs documentation
158 | /site
159 | 
160 | # mypy
161 | .mypy_cache/
162 | .dmypy.json
163 | dmypy.json
164 | 
165 | # Pyre type checker
166 | .pyre/
167 | 
168 | # pytype static type analyzer
169 | .pytype/
170 | 
171 | # Cython debug symbols
172 | cython_debug/
173 | 
174 | # PyCharm
175 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
176 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
177 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
178 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
179 | #.idea/
180 | 
181 | ### Python Patch ###
182 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
183 | poetry.toml
184 | 
185 | # ruff
186 | .ruff_cache/
187 | 
188 | # LSP config files
189 | pyrightconfig.json
190 | 
191 | ### VisualStudioCode ###
192 | .vscode/*
193 | !.vscode/settings.json
194 | !.vscode/tasks.json
195 | !.vscode/launch.json
196 | !.vscode/extensions.json
197 | !.vscode/*.code-snippets
198 | 
199 | # Local History for Visual Studio Code
200 | .history/
201 | 
202 | # Built Visual Studio Code Extensions
203 | *.vsix
204 | 
205 | ### VisualStudioCode Patch ###
206 | # Ignore all local history of files
207 | .history
208 | .ionide
209 | 
210 | # End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,git
211 | 
212 | 
213 | {{ cookiecutter.project_slug }}/.env
214 | {{ cookiecutter.project_slug }}/pyproject.toml
215 | {{ cookiecutter.project_slug }}/key.txt
216 | {{ cookiecutter.project_slug }}/pyproject.toml
217 | {{ cookiecutter.project_slug }}/services/llm-router/google_vertexai.json
218 | rendered-template/
219 | key.txt
220 | {{cookiecutter.project_slug}}/services/llm-router/google_vertexai.json
221 | .idea/
222 | 


--------------------------------------------------------------------------------
/docs/cilogon-integration.md:
--------------------------------------------------------------------------------
  1 | # OIDC Integration Using CILogon
  2 | 
  3 | ## Introduction
  4 | 
  5 | [CILogon](https://www.cilogon.org) provides a standards-compliant OpenID
  6 | Connect (OAuth 2.0) interface to federated authentication for
  7 | cyberinfrastructure (CI). CILogon's federated identity management enables
  8 | researchers to use their home organization credentials to access
  9 | applications, rather than requiring yet another username and password to
 10 | log on. 
 11 | 
 12 | CILogon is operated by the
 13 | [National Center for Supercomputing Applications (NCSA)](https://www.ncsa.illinois.edu/)
 14 | at the [University of Illinois at Urbana-Champaign](https://illinois.edu/).
 15 | 
 16 | ## Prequisites
 17 | 
 18 | You should have already successfully deployed and configured the
 19 | chat service (Open WebUI) using the standard login form authentication.
 20 | This guide only details additional configuration needed for OIDC integration
 21 | using CILogon and does not address overall configuration issues.
 22 | 
 23 | The [OIDC protocol](https://openid.net/specs/openid-connect-core-1_0.html)
 24 | requires web applications to be served using HTTPS. Your service should
 25 | already be
 26 | [configured to use HTTPS](link-to-traefix-documentation).
 27 | The only exception to this requirement is during exploration or development
 28 | when `http://localhost` or `http://127.0.0.1` may be used.
 29 | 
 30 | ## Request Your CILogon Client ID and Secret
 31 | 
 32 | CILogon subscribers may log into their CILogon Registry service and use
 33 | the self-service interface to request a client ID and secret.
 34 | 
 35 | Basic Authentication service tier (free) users should request a client
 36 | following the instructions below and wait for a notice of approval.
 37 | 
 38 | 1. Browse to the
 39 | [CILogon OpenID Connect (OIDC) Client Registration](https://cilogon.org/oauth2/register)
 40 | form.
 41 | 
 42 | 1. Complete the form fields for Client Name, Contact Email, and Home URL.
 43 | 
 44 | 1. For the Callback URLs field enter `https://<YOUR SERVER>/oauth/oidc/callback`
 45 |    and repalce `<YOUR SERVER>` with the hostname or service name for your
 46 |    deployment.
 47 | 
 48 | 1. For Scopes tick the boxes for email, openid, and profile.
 49 | 
 50 | 1. Click `Register Client`.
 51 | 
 52 | 1. Record the client ID and secret. You must safely escrow the client secret
 53 |    since CILogon does not store it and only stores a computed hash of the
 54 |    secret.
 55 | 
 56 | 1. Wait for an email indicating your client has been approved. You cannot
 57 |    successfully test your configuration until the client has been approved.
 58 | 
 59 | ## Configuration
 60 | 
 61 | The OAuth or OIDC integration for Open WebUI may be completely configured
 62 | using environment variables. For additional details beyond those below
 63 | see the following Open WebUI documentation:
 64 | 
 65 | - [Environment Variable Configuration](https://docs.openwebui.com/getting-started/env-configuration)
 66 | - [SSO(OAuth, OIDC, Trusted Header)](https://docs.openwebui.com/features/auth/sso/)
 67 | - [Troubleshooting OAUTH/SSO Issues](https://docs.openwebui.com/troubleshooting/sso/)
 68 | 
 69 | 
 70 | Edit the `llm_chat_ui.environment` section of the `docker-compose.yml` file 
 71 | as follows:
 72 | 
 73 | 1.
 74 |    ```
 75 |    ENABLE_OAUTH_PERSISTENT_CONFIG: "False"
 76 |    ```
 77 |    
 78 |    This forces the OAuth configuration to always be read from environment
 79 |    variables on every restart.
 80 | 
 81 | 1. 
 82 |    ```
 83 |    ENABLE_SIGNUP: "True"
 84 |    ```
 85 | 
 86 |    Enable user account creation generally. See also `ENABLE_OAUTH_SIGNUP`.
 87 | 
 88 | 1.
 89 |    ```
 90 |    ENABLE_OAUTH_SIGNUP: "True"
 91 |    ```
 92 |    
 93 |    Enable user account creation when authenticating using OAuth.
 94 | 
 95 | 1.
 96 |    ```
 97 |    WEBUI_URL: "https://<YOUR SERVER>"
 98 |    ```
 99 | 
100 |    Replace `<YOUR SERVER>` with the hostname from which your service will
101 |    be served. Open WebUI uses this configuration to construct the appropriate
102 |    return URI used during the OAuth or OIDC authentication flow.
103 | 
104 | 1.
105 |    ```
106 |    OAUTH_CLIENT_ID: "<YOUR CLIENT ID FROM CILOGON>"
107 |    ```
108 | 
109 |    Replace `<YOUR CLIENT ID FROM CILOGON>` with the client ID obtained when
110 |    you requested your client from CILogon. The client ID will usually follow
111 |    the format
112 | 
113 |    ```
114 |    cilogon:/client_id/...
115 |    ```
116 | 
117 | 1.
118 |    ```
119 |    OAUTH_CLIENT_SECRET: "<YOUR CLIENT SECRET FROM CILOGON>"
120 |    ```
121 | 
122 |    Replace `<YOUR CLIENT SECRET FROM CILOGON>` with the client secret
123 |    obtained when you requested your client from CILogon.
124 | 
125 | 1.
126 |    ```
127 |    OPENID_PROVIDER_URL: "https://cilogon.org/.well-known/openid-configuration"
128 |    ```
129 | 
130 | 1.
131 |    ```
132 |    OAUTH_PROVIDER_NAME: "CILogon"
133 |    ```
134 | 
135 | 1.
136 |    ```
137 |    OAUTH_SCOPES: "openid email profile"
138 |    ```
139 | 
140 | 1.
141 |    ```
142 |    OPENID_REDIRECT_URI: "https://<YOUR SERVER>/oauth/oidc/callback"
143 |    ```
144 | 
145 | 1.
146 |    ```
147 |    OAUTH_ALLOWED_DOMAINS: "<YOUR CAMPUS DOMAIN>"
148 |    ```
149 | 
150 |    Since CILogon supports authentication from over 6,000 campus login servers
151 |    around the world you may wish to restrict login from only your campus
152 |    users. To do so enter the domain of your campus, for example
153 | 
154 |    ```
155 |    illinois.edu
156 |    ```
157 | 
158 |    You may use a comma-separted list of domains, for example
159 | 
160 |    ```
161 |    illinois.edu,berkeley.edu,tuwien.ac.at
162 |    ```
163 | 
164 |    CILogon subscribers may instead request that the OAuth2 server restrict
165 |    logins from only a subset of login servers (server-side authorization is
166 |    not available without a subscription).
167 | 
168 | ## Restart and Test
169 | 
170 | After you have received email notification that your CILogon client has been
171 | approved and you have edited the `docker-compose.yml` file as detailed above,
172 | you may restart your service and test the CILogon OIDC integration.
173 | 


--------------------------------------------------------------------------------
/docs/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'LLM-in-a-Box: A Templated, Self-Hostable Framework for Generative AI in Research'
  3 | tags:
  4 |   - Python
  5 |   - Docker
  6 |   - LLM
  7 |   - generative AI
  8 |   - self-hosting
  9 |   - reproducible research
 10 |   - RSE
 11 |   - ollama
 12 |   - litellm
 13 |   - docling
 14 |   - qdrant
 15 |   - traefik
 16 |   - secops
 17 |   - sops
 18 |   - age
 19 | authors:
 20 |   - name: Georg Heiler
 21 |     orcid: 0000-0002-8684-1163
 22 |     affiliation: "1, 2"
 23 |   - name: Aaron Culich
 24 |     affiliation: 3
 25 | affiliations:
 26 |  - name: Complexity Science Hub Vienna (CSH)
 27 |    index: 1
 28 |  - name: Austrian Supply Chain Intelligence Institute (ASCII)
 29 |    index: 2
 30 |  - name: Eviction Research Network at University of California, Berkeley
 31 |    index: 3
 32 | date: 1st July 2025
 33 | bibliography: paper.bib
 34 | 
 35 | # Optional fields if submitting to a AAS journal too, see this blog post:
 36 | # <https://blog.joss.theoj.org/2018/12/a-new-collaboration-with-aas-publishing
 37 | aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it.
 38 | aas-journal: Journal of Open Source Education
 39 | ---
 40 | 
 41 | # Summary
 42 | 
 43 | The proliferation of Large Language Models (LLMs) has created significant opportunities for innovation across all research domains.
 44 | However, accessing these powerful tools effectively besides the consumer oriented flagship products (chatgpt, claude workspace) presents a substantial challenge.
 45 | Researchers are often faced with a choice between relying on proprietary, cloud-based APIs—which can introduce concerns regarding cost, data privacy, and scientific reproducibility — or undertaking the complex engineering task of self-hosting open-source models.
 46 | This reliance on a few dominant providers creates new, powerful gatekeeper functions; whoever controls the LLM interface can dictate which tools a user sees, what responses are surfaced, and even whether a third-party tool can be installed in the first place [@graham_mcps_2025].
 47 | The technical barrier to the alternative, self-hosting, often requires dedicated support from Research Software Engineers (RSEs), a role that is crucial but not always available to every research group [@hettrick2013uk].
 48 | 
 49 | LLM-in-a-Box is a templated project designed to democratize access to generative AI for the research community.
 50 | It provides a cohesive, containerized stack of open-source tools that can be deployed with minimal configuration, effectively packaging the expertise of an RSE into a reusable template.
 51 | The project integrates a flexible chat interface (OpenWebUI  [@openwebui]), a powerful model server for self-hosting (Ollama  [@ollama]), and a universal model router (LiteLLM  [@litellm]) that unifies access to both local and commercial models through a single API endpoint and docling [@auer2024doclingtechnicalreport] for effective high-quality retrieval-augmented-generation RAG pipelines.
 52 | This architecture directly addresses the dire need for a flexible and sovereign compute environment, allowing researchers to seamlessly switch between models from providers like OpenAI and Anthropic and self-hosted models like Llama3 or Gemma for both chat and programmatic access.
 53 | 
 54 | By providing a stable, well-documented, and easily updatable foundation using tools like docker compose and cruft, LLM-in-a-Box lowers the barrier to entry for advanced AI/ML workflows.
 55 | This enables researchers to focus on their scientific questions rather than on infrastructure, while ensuring that their methods remain transparent, private, and reproducible.
 56 | 
 57 | # Statement of Need 
 58 | 
 59 | The responsible application of LLMs in research necessitates infrastructure that is both powerful and flexible.
 60 | Commercial API-based models are convenient but can be a black box, with models and terms changing without notice, jeopardizing long-term reproducibility.
 61 | Furthermore, research involving sensitive data (e.g., in medicine, social sciences, or proprietary industrial research) cannot be sent to third-party services.
 62 | Self-hosting is the clear solution, but it traditionally involves a complex orchestration of multiple services: a model serving engine, a user interface, routing logic, and data persistence. 
 63 | 
 64 | LLM-in-a-Box was created to fill this gap.
 65 | It provides a pre-configured environment that solves the integration challenge, making robust, self-hosted generative AI capabilities accessible to research groups, educational institutions, and individual developers. By including docling for document extraction, it also offers a clear pathway to implementing sophisticated Retrieval-Augmented Generation (RAG) workflows, a critical technique for grounding model outputs in specific, verifiable sources of information.
 66 | This template empowers researchers to build and control their own AI ecosystem, which is fundamental for scientific integrity and innovation in the age of generative AI. 
 67 | 
 68 | # Example usage
 69 | 
 70 | Prerequisites:
 71 | 
 72 | - You have a working installation of pixi available. See https://pixi.sh/latest/ for installing
 73 | - An OCI compliant container runtime like docker desktop
 74 | - git
 75 | 
 76 | ```bash
 77 | git clone git@github.com:complexity-science-hub/llm-in-a-box-template.git
 78 | cd llm-in-a-box-template
 79 | pixi run tpl-init
 80 | ```
 81 | Change into your freshly generated instance of the tempalte project.
 82 | Then: Set up your secrets in the `.env` file.
 83 | You fin an example configration directly in the [README.md](https://github.com/complexity-science-hub/llm-in-a-box-template/blob/main/README.md) file.
 84 | Make sure to not use dummy secrets - generate secure ones via i.e. `openssl rand -hex 32` for each secret.
 85 | Please read [the secops instructions](https://github.com/complexity-science-hub/llm-in-a-box-template/blob/main/%7B%7Bcookiecutter.project_slug%7D%7D/documentation/secops/add-key.md) to understand how to manage secrets easily.
 86 | 
 87 | 
 88 | Then you can start the template project with:
 89 | ```bash
 90 | # cpu
 91 | docker compose \
 92 |   --profile llminabox \
 93 |   --profile ollama-cpu \
 94 |   --profile docling-cpu \
 95 |   --profile vectordb-cpu \
 96 |   up -d
 97 |   
 98 | # gpu
 99 | docker compose \
100 |   --profile llminabox \
101 |   --profile ollama-gpu \
102 |   --profile docling-gpu \
103 |   --profile vectordb-cpu \
104 |   up -d
105 | ```
106 | 
107 | Please follow along with the [README.md](https://github.com/complexity-science-hub/llm-in-a-box-template/blob/main/README.md) file from here.
108 | 
109 | # Impact and future work
110 | 
111 | LLM‑in‑a‑Box has already been adopted inside the Complexity Science Hub.
112 | Planned enhancements include a FluxCD‑based Kubernetes deployment which bootsraps SSL as well.
113 | We welcome community pull requests to extend this project.
114 | 
115 | # Acknowledgements
116 | 
117 | This project stands on the shoulders of giants in the open-source community.
118 | We explicitly acknowledge the creators and maintainers of the core components of this stack: llamacpp, Ollama, LiteLLM, OpenWebUI, Traefik, docker, docling, sops, age, qdrant, and Postgres.
119 | Their work makes projects like this possible.
120 | 
121 | # References


--------------------------------------------------------------------------------
/docs/DIAGRAM.md:
--------------------------------------------------------------------------------
  1 | # LLM-in-a-Box Architecture & Ecosystem
  2 | 
  3 | ## Architecture Diagram
  4 | 
  5 | Optional non core services are not a part of this template - at least not as of now.
  6 | 
  7 | ```mermaid
  8 | flowchart TB
  9 |   %% Core Services (Blue Box)
 10 |   subgraph Core ["Core Services"]
 11 |     direction TB
 12 |     
 13 |     Traefik["Traefik<br/>(Reverse Proxy)"]
 14 |     
 15 |     subgraph CoreMain ["Main Services"]
 16 |       direction LR
 17 |       OpenWebUI["OpenWebUI<br/>(Chat UI)"]
 18 |       Docling["Docling<br/>(Document Extraction)"]
 19 |     end
 20 |     
 21 |     subgraph CoreBackend ["Backend Services"]
 22 |       direction LR
 23 |       LiteLLM["LiteLLM<br/>(Model Router)"]
 24 |       Ollama["Ollama<br/>(Model Server)"]
 25 |     end
 26 |     
 27 |     Postgres["Postgres<br/>(State DB)"]
 28 |   end
 29 |   
 30 |   %% Addon Services (Yellow Box)
 31 |   subgraph Addons ["Addon Services"]
 32 |     direction TB
 33 |     
 34 |     subgraph AddonsTop ["Infrastructure"]
 35 |       direction LR
 36 |       API["API Gateway<br/>(e.g. Kong)"]
 37 |       Monitoring["Monitoring<br/>(Prometheus/Grafana)"]
 38 |       SSO["SSO/Identity<br/>(Keycloak)"]
 39 |     end
 40 |     
 41 |     subgraph AddonsMiddle ["Data & Storage"]
 42 |       direction LR
 43 |       FileStore["Object Storage<br/>(MinIO/S3)"]
 44 |       Qdrant["Qdrant<br/>(Vector DB)"]
 45 |       DataViz["Data Visualization<br/>(Metabase)"]
 46 |     end
 47 |     
 48 |     subgraph AddonsBottom ["Workflow & Automation"]
 49 |       direction LR
 50 |       Workflow["Workflow Engines<br/>(Temporal)"]
 51 |       Notebooks["Jupyter/Polynote<br/>(Notebooks)"]
 52 |       n8n["n8n<br/>(Automation)"]
 53 |     end
 54 |   end
 55 |   
 56 |   %% Core Internal Connections
 57 |   Traefik --> OpenWebUI
 58 |   Traefik --> Docling
 59 |   Traefik --> LiteLLM
 60 |   Traefik --> Ollama
 61 |   
 62 |   OpenWebUI -->|API| LiteLLM
 63 |   OpenWebUI -->|State| Postgres
 64 |   
 65 |   Docling -->|RAG| LiteLLM
 66 |   Docling -->|State| Postgres
 67 |   
 68 |   LiteLLM -->|Model API| Ollama
 69 |   LiteLLM -->|State| Postgres
 70 |   
 71 |   %% Addon to Core Connections (with labels)
 72 |   API -.->|"Proxy"| Traefik
 73 |   Monitoring -.->|"Metrics"| Traefik
 74 |   Monitoring -.->|"Health"| OpenWebUI
 75 |   Monitoring -.->|"Performance"| LiteLLM
 76 |   Monitoring -.->|"Resources"| Ollama
 77 |   Monitoring -.->|"Database"| Postgres
 78 |   
 79 |   SSO -.->|"Auth"| Traefik
 80 |   SSO -.->|"Login"| OpenWebUI
 81 |   
 82 |   FileStore -.->|"Documents"| Docling
 83 |   FileStore -.->|"Models"| Ollama
 84 |   
 85 |   Qdrant -.->|"Vector Search"| Docling
 86 |   Qdrant -.->|"Embeddings"| LiteLLM
 87 |   
 88 |   DataViz -.->|"Analytics"| Postgres
 89 |   
 90 |   Workflow -.->|"Orchestration"| Docling
 91 |   Workflow -.->|"Model Calls"| LiteLLM
 92 |   Workflow -.->|"Inference"| Ollama
 93 |   
 94 |   Notebooks -.->|"Data Processing"| Docling
 95 |   Notebooks -.->|"Model Testing"| LiteLLM
 96 |   Notebooks -.->|"Experiments"| Ollama
 97 |   Notebooks -.->|"Analysis"| Postgres
 98 |   
 99 |   n8n -.->|"Automation"| Docling
100 |   n8n -.->|"API Integration"| LiteLLM
101 |   n8n -.->|"Model Pipeline"| Ollama
102 |   n8n -.->|"Data Sync"| Postgres
103 |   
104 |   %% Styling
105 |   classDef core fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#0d47a1
106 |   classDef addon fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#e65100
107 |   classDef coreMain fill:#bbdefb,stroke:#1976d2,stroke-width:2px
108 |   classDef coreBackend fill:#90caf9,stroke:#1976d2,stroke-width:2px
109 |   classDef addonTop fill:#ffecb3,stroke:#f57c00,stroke-width:2px
110 |   classDef addonMiddle fill:#ffe0b2,stroke:#f57c00,stroke-width:2px
111 |   classDef addonBottom fill:#ffcc02,stroke:#f57c00,stroke-width:2px
112 |   
113 |   class Core core
114 |   class Addons addon
115 |   class CoreMain coreMain
116 |   class CoreBackend coreBackend
117 |   class AddonsTop addonTop
118 |   class AddonsMiddle addonMiddle
119 |   class AddonsBottom addonBottom
120 | ```
121 | 
122 | ---
123 | 
124 | ## Layered Stack: Core, Addons, and Extensions
125 | 
126 | ### Core (Turnkey, Always Included)
127 | - **Reverse Proxy**: Traefik (or Nginx)
128 | - **Chat UI**: OpenWebUI (or similar)
129 | - **Model Router**: LiteLLM (or OpenRouter, vLLM)
130 | - **Model Server**: Ollama (or LM Studio, vLLM, sglang)
131 | - **Document Extraction**: Docling (for high qualtiy document preparation to improve RAG results)
132 | - **State DB**: Postgres
133 | 
134 | ### Addons (Quick-Add, Highly Recommended)
135 | - **Vector DB**: Qdrant, Milvus, Weaviate, or Chroma
136 | - **Automation/Orchestration**: n8n, dagster, or Airflow
137 | - **SSO/Identity**: Keycloak, Authentik, or Auth0
138 | - **Monitoring/Observability**: Prometheus, Grafana, Loki
139 | - **Object/File Storage**: MinIO, S3
140 | - **Notebooks**: Jupyter, Polynote
141 | - **Data Visualization**: Metabase, Superset
142 | - **Workflow Engines**: Temporal, Argo Workflows
143 | - **API Gateway**: Kong, Ambassador
144 | 
145 | ### Extensions/Specializations (Optional, Use-Case Driven)
146 | - **Fine-tuning/Training UI**: LoRA Studio, Hugging Face AutoTrain
147 | - **Agent Frameworks**: LangChain, CrewAI, AutoGen
148 | - **Data Labeling**: Label Studio
149 | - **ML Experiment Tracking**: MLflow, Weights & Biases
150 | - **RAG Frameworks**: LlamaIndex, Haystack
151 | - **Search**: OpenSearch, Elasticsearch
152 | - **Chatbot Integrations**: Slack, Discord, Teams, Webhooks
153 | - **Analytics**: Amplitude, PostHog
154 | - **Security**: Vault, SOPS, OPA
155 | 
156 | 
157 | ## Core vs. Addons/Extensions
158 | 
159 | - **Core**: Should always be present for a functional, private, multi-model LLM stack (UI, router, model server, DB, proxy, doc extraction)
160 | - **Addons**: Should be one-command add (docker-compose, helm, etc.), and cover most common needs (vector db, SSO, monitoring, storage, automation)
161 | - **Extensions**: For advanced users, research, or verticals (fine-tuning, analytics, agent frameworks, integrations)
162 | 
163 | ---
164 | 
165 | ## Similar Projects & Inspiration
166 | ### Starter Projects
167 | #### 1. [philschmid/open-llm-stack](https://github.com/philschmid/open-llm-stack)
168 | - Focus: Production-ready open LLM stack (HuggingChat, TGI, MongoDB, Langchain, vLLM, OpenSearch)
169 | - Modular, cloud/on-prem, with examples for different providers
170 | - builds around huggingface chat - less cosutomizable especially with regards to enterprise security settings
171 | - lacks advanced rag integration
172 | 
173 | #### 2. [tmc/mlops-community-llm-stack-hack](https://github.com/tmc/mlops-community-llm-stack-hack)
174 | - Focus: MLOps community hackathon starter for LLM stacks
175 | - Includes Go backend, Python services, vector visualization, Slack monitoring
176 | - unmaintained
177 | 
178 | #### 3. [godatadriven/openllm-starter](https://github.com/godatadriven/openllm-starter)
179 | - Focus: GPU infra provisioning, Streamlit chat, Jupyter, GCP automation
180 | - Good for quickstart on cloud GPU
181 | - however lacks docling integration for advanved rag 
182 | - lacks large community like open web ui for contionous updates
183 | 
184 | ### advanced further ideas
185 | 
186 | #### 4. [aishwaryaprabhat/BigBertha](https://github.com/aishwaryaprabhat/BigBertha)
187 | - Focus: LLMOps on Kubernetes (ArgoCD, Argo Workflows, Prometheus, MLflow, MinIO, Milvus, LlamaIndex)
188 | - End-to-end retraining, monitoring, vector ingestion
189 | 
190 | #### 5. [IceBearAI/LLM-And-More](https://github.com/IceBearAI/LLM-And-More)
191 | - Focus: Plug-and-play, full LLM workflow (data, training, deployment, evaluation)
192 | - Modular, professional, with UI and workflow orchestration
193 | 


--------------------------------------------------------------------------------
/docs/THOUGHTS.md:
--------------------------------------------------------------------------------
  1 | # LLM-in-a-Box: Summary and Analysis
  2 | 
  3 | ## Key Points & Purpose
  4 | 
  5 | ### Core Problem Being Solved
  6 | - **Access Gap**: Researchers face a difficult choice between proprietary cloud APIs (cost, privacy, reproducibility concerns) vs complex self-hosting
  7 | - **Gatekeeper Control**: Dominant providers control interface, responses, and tool access
  8 | - **RSE Dependency**: Self-hosting typically requires dedicated Research Software Engineer support (not always available)
  9 | - **Infrastructure Complexity**: Traditional self-hosting involves orchestrating multiple services
 10 | 
 11 | ### Solution Approach
 12 | - **Templated Framework**: Pre-configured, containerized stack of open-source tools
 13 | - **Minimal Configuration**: Deployable with minimal setup complexity
 14 | - **RSE Expertise Packaging**: Bundles RSE knowledge into reusable template
 15 | - **Sovereignty**: Enables switching between commercial and self-hosted models seamlessly
 16 | 
 17 | ## Technical Components & Architecture
 18 | 
 19 | ### Core Tools Stack
 20 | - **OpenWebUI**: Chat interface
 21 | - **Ollama**: Model server for self-hosting
 22 | - **LiteLLM**: Universal model router (single API for local + commercial models)
 23 | - **Docling**: Document extraction for RAG pipelines
 24 | - **Traefik**: Reverse proxy/load balancer
 25 | - **Qdrant**: Vector database
 26 | - **PostgreSQL**: Data persistence
 27 | - **SOPS + Age**: Secret management
 28 | - **Docker Compose**: Container orchestration
 29 | - **Cruft**: Template updating
 30 | 
 31 | ### Infrastructure Patterns
 32 | - **Container-based deployment** (CPU and GPU profiles)
 33 | - **Profile-based configuration** (different service combinations)
 34 | - **Secret management** with encryption
 35 | - **Template-driven project generation**
 36 | 
 37 | ## Prerequisites & Installation Requirements
 38 | 
 39 | ### Hard Dependencies
 40 | - **Pixi**: Package manager (https://pixi.sh/latest/)
 41 | - **OCI Container Runtime**: Docker Desktop or equivalent
 42 | - **Git**: Version control
 43 | - **OpenSSL**: For secret generation
 44 | 
 45 | ### Installation Process
 46 | ```bash
 47 | git clone git@github.com:complexity-science-hub/llm-in-a-box-template.git
 48 | cd llm-in-a-box-template
 49 | pixi run tpl-init
 50 | # Configure .env secrets
 51 | # Start with docker compose profiles
 52 | ```
 53 | 
 54 | ## Target Audience Analysis
 55 | 
 56 | ### Aspirational Audience
 57 | - **Research Groups**: Need sovereign AI capabilities
 58 | - **Educational Institutions**: Teaching and research applications
 59 | - **Individual Developers**: Personal AI infrastructure
 60 | - **Domain Specialists**: Non-CS researchers needing AI tools
 61 | 
 62 | ### Actual Audience Reality
 63 | **Who This Really Works For:**
 64 | - Researchers with existing Docker/containerization experience
 65 | - Teams with at least one person comfortable with system administration
 66 | - Organizations with basic DevOps infrastructure already in place
 67 | 
 68 | **Who This Struggles To Serve:**
 69 | - Pure domain specialists without systems background
 70 | - Researchers on completely fresh installations
 71 | - Users unfamiliar with container orchestration
 72 | - Teams without dedicated technical support
 73 | 
 74 | ## Foundational Knowledge Gaps & Barriers
 75 | 
 76 | ### Core Systems Expertise Required
 77 | 
 78 | #### Container & Orchestration Knowledge
 79 | - **Docker fundamentals**: Images, containers, volumes, networks
 80 | - **Docker Compose**: Service definitions, profiles, environment variables
 81 | - **Container debugging**: Logs, exec access, networking troubleshooting
 82 | 
 83 | #### Networking & Infrastructure
 84 | - **Reverse proxy concepts**: Understanding Traefik configuration
 85 | - **Port management**: Avoiding conflicts, understanding service discovery
 86 | - **SSL/TLS basics**: Certificate management (future Kubernetes deployment)
 87 | 
 88 | #### Security & Secret Management
 89 | - **SOPS/Age cryptography**: Key generation, encryption workflows  
 90 | - **Environment variable security**: .env file management, secret rotation
 91 | - **Container security**: Image scanning, runtime security
 92 | 
 93 | #### System Administration
 94 | - **Package management**: Understanding Pixi, resolving dependency conflicts
 95 | - **File permissions**: Understanding volume mounts, user/group IDs
 96 | - **Process management**: Service startup, health checking, resource monitoring
 97 | 
 98 | #### Debugging & Troubleshooting
 99 | - **Log analysis**: Multi-container log aggregation and interpretation
100 | - **Resource debugging**: Memory, GPU, storage issues
101 | - **Network troubleshooting**: Service discovery, port binding issues
102 | - **Dependency resolution**: When services fail to start or communicate
103 | 
104 | ### Knowledge Bootstrapping Challenge
105 | 
106 | **The Chicken-and-Egg Problem:**
107 | - **LLM Help Requires Working LLM**: Can't use AI to debug AI setup when it's broken
108 | - **System Expertise + AI**: Effective troubleshooting requires foundational knowledge PLUS AI assistance
109 | - **Implicit Assumptions**: "Simple" setup assumes significant prior knowledge
110 | - **Failure Cascades**: One small issue can make entire system inaccessible
111 | 
112 | ### Installation Environment Variations
113 | 
114 | #### Brand New Laptop Scenarios
115 | - **Missing dependencies**: Python, Docker, Git, package managers
116 | - **Permission issues**: Admin access, Docker daemon access
117 | - **Network restrictions**: Corporate firewalls, proxy configurations
118 | - **Resource constraints**: RAM, disk space, GPU availability
119 | 
120 | #### Pre-configured System Scenarios  
121 | - **Port conflicts**: Existing services using required ports
122 | - **Version conflicts**: Incompatible Docker/Python/tool versions
123 | - **Configuration interference**: Existing Traefik, database, or proxy setups
124 | - **Partial installations**: Broken previous attempts creating state conflicts
125 | 
126 | ## Recommendations for Improved Accessibility
127 | 
128 | ### Pre-Installation Assessment
129 | - **System compatibility checker**: Script to verify prerequisites
130 | - **Environment scanner**: Detect potential conflicts before installation
131 | - **Resource calculator**: Minimum RAM/disk/GPU requirements
132 | 
133 | ### Installation Improvements
134 | - **Step-by-step guided setup**: Interactive installation wizard
135 | - **Environment-specific instructions**: Mac/Windows/Linux variations
136 | - **Rollback mechanisms**: Easy cleanup of failed installations
137 | - **Dependency auto-installation**: Automated prerequisite installation where possible
138 | 
139 | ### Documentation Enhancements
140 | - **Troubleshooting decision trees**: "If X fails, try Y, then Z"
141 | - **Common failure scenarios**: Pre-documented solutions for typical issues
142 | - **Conceptual primers**: Brief explainers for Docker, networking, secrets management
143 | - **Video walkthroughs**: Visual setup guides for different platforms
144 | 
145 | ### Community Support Infrastructure
146 | - **Installation validation**: Community-tested configurations
147 | - **Issue templates**: Structured bug reporting for setup problems
148 | - **Office hours/support channels**: Real-time help for setup issues
149 | - **Mentorship program**: Experienced users helping newcomers
150 | 
151 | ## Bottom Line Assessment
152 | 
153 | **Great Idea, Implementation Gap**: The project addresses a real need and provides genuine value, but the gap between "minimal configuration" and actual user experience reveals the classic CS assumption problem. Even with containerization abstracting much complexity, the foundational systems knowledge required for troubleshooting remains substantial.
154 | 
155 | **Success Requires**: Either extensive pre-existing technical background OR significant institutional support OR very favorable environmental conditions (clean system, no conflicts, perfect documentation match).
156 | 
157 | **Path Forward**: Focus on installation experience optimization, assumption documentation, and failure mode preparation rather than just the happy-path documentation.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LLM in a box template
  2 | 
  3 | This template provides an easy-to-deploy, self-hostable stack to make the generative AI ecosystem more approachable for research and education.
  4 | It unifies access to both commercial and local models (via Ollama) through a flexible chat UI and a single API endpoint, enabling private, reproducible, and sovereign AI workflows.
  5 | 
  6 | This template project contains:
  7 | 
  8 | - A **flexible Chat UI** [OpenWebUI](https://docs.openwebui.com/)
  9 | - **Document extraction** for refined RAG via [docling](https://docs.openwebui.com/features/document-extraction/docling)
 10 |   - https://github.com/docling-project/docling
 11 |   - https://github.com/docling-project/docling-serve
 12 | - A **model router** [litellm](https://www.litellm.ai/)
 13 | - A **model server** [ollama](https://ollama.com/)
 14 | - State is stored in Postgres https://www.postgresql.org/
 15 | 
 16 | This template is built with [cruft](https://cruft.github.io/cruft/) so it is easy to update.
 17 | Furthermore secrets are managed with [sops](https://github.com/getsops/sops) and [age](https://github.com/FiloSottile/age).
 18 | We use [traefik](https://traefik.io/traefik/) as a reverse proxy.
 19 | 
 20 | ![Icon](img/llm-in-a-box-icon.png)
 21 | 
 22 | ## Publication
 23 | 
 24 | This project is a contribution of the [Complexity Science Hub](https://complexity.science/) and is published under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
 25 | This template project is published to [JOSE (journal of open source education)](https://jose.theoj.org/).
 26 | You find the DOI here: <<TODO finish publication>>>
 27 | 
 28 | The source code for the publication is availabe in the `docs/paper.md` file.
 29 | 
 30 | ## Using the template
 31 | 
 32 | We intend to eventually have more deployment processes scaffolded. However, this is usually fairly specific for a given organization.
 33 | Therefore, we start out with `docker compose`.
 34 | And perhaps later will add soemthing for Kubernets with fluxcd.
 35 | 
 36 | You may want to customize a productiion grade deployment for SSL handling and SSO.
 37 | But we hope this gives you a good starting point.
 38 | 
 39 | See the [QUICKSTART.md](./docs/QUICKSTART.md) for an in-depth quick start guide.
 40 | 
 41 | ### Prerequisites
 42 | 
 43 | You have a working installation of pixi available.
 44 | 
 45 | See https://pixi.sh/latest/ for installing. In short:
 46 | 
 47 | - osx/linux: `curl -fsSL https://pixi.sh/install.sh | sh`
 48 | - windows: `powershell -ExecutionPolicy ByPass -c "irm -useb https://pixi.sh/install.ps1 | iex"`
 49 | 
 50 | Furthermore, you must have some kind of container runtime installed.
 51 | For example:
 52 | - Docker Desktop https://www.docker.com/products/docker-desktop/
 53 | - Rancher Desktop https://rancherdesktop.io/
 54 |   - this uses nerdctl
 55 |   - all commands below should work with `nerdctl compose` instead of `docker compose`
 56 | - Kubernetes
 57 | - Podman
 58 | - ...
 59 | 
 60 | 
 61 | ### Applying the template
 62 | #### Docker (simple)
 63 | For now we assume you are using docker.
 64 | 
 65 | > NOTICE: Pixi will bootstrap most required tools
 66 | 
 67 | ```bash
 68 | git clone https://github.com/complexity-science-hub/llm-in-a-box-template.git
 69 | cd llm-in-a-box-template
 70 | pixi run tpl-init-cruft
 71 | 
 72 | # alternatively:
 73 | # pip install cruft jinja2-ospath
 74 | # cruft create git@github.com:complexity-science-hub/llm-in-a-box-template.git
 75 | ```
 76 | 
 77 | Ensure you have checked out vllm (afterwards):
 78 | 
 79 | ```
 80 | git clone --branch v0.10.0 --depth 1 https://github.com/vllm-project/vllm.git services/model-server/vllm
 81 | ```
 82 | 
 83 | Now set up your secrets in the `.env` file.
 84 | See a template below.
 85 | 
 86 | Fast mode:
 87 | 
 88 | ```bash
 89 | cd <<the name of the generated folder which you have chosen as a project name>>
 90 | ./generate-env.sh
 91 | ```
 92 | 
 93 | ```
 94 | ROOT_DOMAIN=llminabox.geoheil.com
 95 | CLOUDFLARE_IPS=173.245.48.0/20,103.21.244.0/22,103.22.200.0/22,103.31.4.0/22,141.101.64.0/18,108.162.192.0/18,190.93.240.0/20,188.114.96.0/20,197.234.240.0/22,198.41.128.0/17,162.158.0.0/15,104.16.0.0/13,104.24.0.0/14,172.64.0.0/13,131.0.72.0/22
 96 | LOCAL_IPS=127.0.0.1/32,10.0.0.0/8,192.168.0.0/16,172.16.0.0/12
 97 | TZ=Europe/Vienna
 98 | 
 99 | # openssl rand -hex 32 for each
100 | LLM_ROUTER_DB=llm_router_db
101 | LLM_ROUTER_DB_USER=llm_router_db_user
102 | LLM_ROUTER_DB_PASSWORD=somepassword
103 | LITELLM_MASTER_KEY=somepassword
104 | LITELLM_SALT_KEY=somepassword
105 | LITELLM_UI_USERNAME=admin
106 | LITELLM_UI_PASSWORD=somepassword
107 | 
108 | ROUTER_OPENAI_API_KEY=sk-sometoken
109 | ROUTER_ANTHROPIC_API_KEY=sk-someothertoken
110 | 
111 | CHAT_UI_DB=chat_ui_db
112 | CHAT_UI_DB_USER=chat_ui_db_user
113 | CHAT_UI_DB_PASSWORD=somepassword
114 | CHAT_UI_SECRET_KEY=somepassword
115 | 
116 | QDRANT__SERVICE__API_KEY=somepassword
117 | ```
118 | 
119 | 
120 | Finally, start the servies
121 | 
122 | 
123 | ```bash
124 | # cpu
125 | docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up -d
126 | 
127 | # gpu
128 | docker compose --profile llminabox --profile ollama-gpu --profile docling-gpu --profile vectordb-cpu up -d
129 | 
130 | docker compose logs -f
131 | ```
132 | 
133 | finally in your browser navigate to:
134 | 
135 | - connect to the model server (ollama via docker)
136 |   - choose a suitable model from https://ollama.com
137 |   - pull it `docker exec -it ollama ollama pull gemma3:4b`
138 |   - verify it is here `docker exec -it ollama ollama ls`
139 | - Set up the model router available at http://llm.llminabox.geoheil.com/ui
140 |   - log in with the credentials set up in the `.env` file from above
141 |   - register the model: Ensure that []()
142 |     ```bash
143 |     - model_name: "gemma3:4b"
144 |     litellm_params:
145 |       model: "ollama_chat/gemma3:4b"
146 |       api_base: "http://ollama:11434"
147 |     ```
148 |    - possibly modify other model registrations
149 |      - ensure you have API keys for all the models you want to use
150 |      - Openai
151 |      - Claude
152 |      - Gemini
153 |      - VertexAI: make sure to set the right google cloud project
154 |    - stop and restart the model router
155 |    - go to http://llm.llminabox.geoheil.com/ui
156 |    - verify in the UI the model is detected
157 |    - create a new API key (we will use that later in the chat UI)
158 |      - name: `llminabox`
159 |      - select the desired models (i.e. all for now)
160 |      - possibly add some restrictions such as budget, rate limit, expiration, etc.
161 |   - verify the desired models work in the llm playground http://llm.llminabox.geoheil.com/ui/?page=llm-playground
162 | - Lets make this now available for end users. We use Openwebui as a chat UI: http://chat.llminabox.geoheil.com
163 |   - create a user (use some more sensible credentials)
164 |     - admin
165 |     - admin@example.com
166 |     - test
167 |   - mail delivery is not set up by default - you have to set up according to your own organization's needs.
168 |   - in the admin settings
169 |   - ensure the right model connection is defined - see http://chat.llminabox.geoheil.com/admin/settings/connections
170 |   - under openai compatible - create a new connection
171 |     - connection URL: `http://llmrouter:4000/v1`
172 |     - api key: the one you created above
173 |     - ensure the default openai API is disabled - and all traffic goes through the model router
174 | - Litelm model router: llm.llminabox.geoheil.com
175 | 
176 | #### Kubernetes (advanced)
177 | 
178 | This is a more advanced setup.
179 | We use [fluxcd](https://fluxcd.io/) and and [k3s](https://k3s.io/).
180 | 
181 | 
182 | For details see [Advanced setup](./docs/setup-advanced-k3s-fluxcd.md).
183 | 
184 | > TODO: This is a work in progress. And will be updated in the future. For now, only the docker-compose based setup is available.
185 | 
186 | ## Contribution
187 | 
188 | Feel free to contribute - issues & even better pull requests are welcome.
189 | 
190 | ### Developing the template
191 | 
192 | To render an instance of the project:
193 | ```bash
194 | pixi run render-dev
195 | ```
196 | 
197 | see [rendered-template/*](rendered-template) for an example of how the peoject might look like.
198 | 
199 | ```bash
200 | # brings up the CPU template quickly
201 | pixi run start-template
202 | ```
203 | 


--------------------------------------------------------------------------------
/docs/QUICKSTART.md:
--------------------------------------------------------------------------------
  1 | # LLM in a Box - Quick Start Guide
  2 | 
  3 | This guide provides a comprehensive step-by-step process to get the LLM in a Box stack up and running quickly with Docker Compose.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | ### 1. Install Pixi (Package Manager)
  8 | 
  9 | Pixi is required to manage dependencies and tooling.
 10 | 
 11 | **macOS/Linux:**
 12 | ```bash
 13 | curl -fsSL https://pixi.sh/install.sh | sh
 14 | ```
 15 | 
 16 | **Windows:**
 17 | ```powershell
 18 | powershell -ExecutionPolicy ByPass -c "irm -useb https://pixi.sh/install.ps1 | iex"
 19 | ```
 20 | 
 21 | 📖 **Documentation:** https://pixi.sh/latest/
 22 | 
 23 | ### 2. Install Docker
 24 | 
 25 | You need a container runtime. Choose one of:
 26 | 
 27 | - **Docker Desktop** (Recommended for beginners)
 28 |   - Download: https://www.docker.com/products/docker-desktop/
 29 |   - Includes Docker Compose
 30 | 
 31 | - **Rancher Desktop** (Alternative)
 32 |   - Download: https://rancherdesktop.io/
 33 |   - Open source alternative to Docker Desktop
 34 | 
 35 | After installation, verify Docker is working:
 36 | ```bash
 37 | docker --version
 38 | docker compose version
 39 | ```
 40 | 
 41 | ## Initial Setup
 42 | 
 43 | ### 1. Clone and Enter the Repository
 44 | 
 45 | ```bash
 46 | # If you haven't cloned yet
 47 | git clone <your-repo-url>
 48 | cd llm-in-a-box-template
 49 | ```
 50 | 
 51 | ### 2. Run the initialization task
 52 | 
 53 | ```bash
 54 | pixi run tpl-init-cruft
 55 | ```
 56 | 
 57 | When prompted answer the questions asked by `pixi run`. The defaults
 58 | work for a first exploration. Note the value entered for `project_slug`
 59 | and change to the directory created with that name, e.g.
 60 | 
 61 | ```bash
 62 | cd llm_in_a_box
 63 | ```
 64 | 
 65 | ### 3. Set Up Environment Variables
 66 | 
 67 | We've created an automated script to generate your `.env` file with secure values:
 68 | 
 69 | ```bash
 70 | # Generate .env from template
 71 | ./generate-env.sh
 72 | ```
 73 | 
 74 | This will create a `.env` file with:
 75 | - Auto-generated secure passwords and keys
 76 | - Pre-configured domain settings for `project.docker`
 77 | - Automatically detected timezone
 78 | - API keys from your environment (if OPENAI_API_KEY or ANTHROPIC_API_KEY are set)
 79 | - Standard database naming conventions (litellm, openwebui)
 80 | 
 81 | ### 4. Add Your API Keys
 82 | 
 83 | You'll need to manually add API keys for the AI models you want to use:
 84 | 
 85 | #### OpenAI API Key
 86 | 1. Go to: https://platform.openai.com/api-keys
 87 | 2. Click "Create new secret key"
 88 | 3. Copy the key (starts with `sk-`)
 89 | 4. Edit `.env` and replace `sk-CHANGEME_YOUR_OPENAI_KEY` with your key
 90 | 
 91 | #### Anthropic (Claude) API Key
 92 | 1. Go to: https://console.anthropic.com/settings/keys
 93 | 2. Click "Create Key"
 94 | 3. Copy the key (starts with `sk-ant-`)
 95 | 4. Edit `.env` and replace `sk-CHANGEME_YOUR_ANTHROPIC_KEY` with your key
 96 | 
 97 | #### self hosted models (Ollama)
 98 | 
 99 | 1. connect to the ollama docker container (after executing the docker start commands you find later in the instructions)
100 | 2. connect to the container `docker exec -it ollama /bin/bash`
101 | 3. pull the desired models `ollama pull gemma2:2b` (or any other model you want to use)
102 | 
103 | #### Other Providers (Optional)
104 | - **Google Gemini**: https://aistudio.google.com/apikey
105 | - **Vertex AI**: Requires GCP project setup
106 | 
107 | ## Configure Local Domain
108 | 
109 | To use `project.docker` instead of localhost, add this to your hosts file:
110 | 
111 | **macOS/Linux:**
112 | ```bash
113 | echo "127.0.0.1 project.docker" | sudo tee -a /etc/hosts
114 | echo "127.0.0.1 llm.project.docker" | sudo tee -a /etc/hosts
115 | echo "127.0.0.1 chat.project.docker" | sudo tee -a /etc/hosts
116 | ```
117 | 
118 | **Windows (Run as Administrator):**
119 | ```powershell
120 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 project.docker"
121 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 llm.project.docker"
122 | Add-Content C:\Windows\System32\drivers\etc\hosts "127.0.0.1 chat.project.docker"
123 | ```
124 | 
125 | ## Start the Services
126 | 
127 | ### Basic Setup (Recommended for Testing)
128 | 
129 | The `llminabox` profile includes the core services needed to get started:
130 | 
131 | ```bash
132 | docker compose --profile llminabox up -d
133 | ```
134 | 
135 | This starts:
136 | - **Traefik Proxy** (port 80) - Reverse proxy for routing requests
137 | - **LiteLLM Router** - Model routing and API management
138 | - **Open WebUI** - Chat interface
139 | - **PostgreSQL databases** - For LiteLLM and Open WebUI
140 | 
141 | ### Extended Setup (Optional Services)
142 | 
143 | If you want additional capabilities, you can add:
144 | 
145 | #### Local Model Support (Ollama)
146 | ```bash
147 | docker compose --profile llminabox --profile ollama-cpu up -d
148 | ```
149 | 
150 | #### Document Processing (Docling)
151 | ```bash
152 | docker compose --profile llminabox --profile docling-cpu up -d
153 | ```
154 | 
155 | #### Vector Database (Qdrant)
156 | ```bash
157 | docker compose --profile llminabox --profile vectordb-cpu up -d
158 | ```
159 | 
160 | #### Full Extended Setup
161 | ```bash
162 | docker compose --profile llminabox --profile ollama-cpu --profile docling-cpu --profile vectordb-cpu up -d
163 | ```
164 | 
165 | ### GPU Setup (For Better Performance)
166 | 
167 | ```bash
168 | docker compose --profile llminabox --profile ollama-gpu --profile docling-gpu --profile vectordb-cpu up -d
169 | ```
170 | 
171 | Monitor the logs:
172 | ```bash
173 | docker compose logs -f
174 | ```
175 | 
176 | ## Initial Configuration
177 | 
178 | ### 1. Configure the Model Router (LiteLLM)
179 | 
180 | 1. Open: http://llm.project.docker/ui
181 | 2. Login with credentials from your `.env`:
182 |    - Username: `admin` (or what you set for `LITELLM_UI_USERNAME`)
183 |    - Password: (check `LITELLM_UI_PASSWORD` in your `.env`)
184 | 
185 | 3. Register external models (if you added API keys):
186 |    - OpenAI models are auto-detected if API key is set
187 |    - For Claude, add models like `claude-3-sonnet-20240229`
188 | 
189 | 4. If you started Ollama, register your local models:
190 |    - Click "Add Model"
191 |    - Configure:
192 |      ```yaml
193 |      model_name: "gemma2:2b"
194 |      litellm_params:
195 |        model: "ollama_chat/gemma2:2b"
196 |        api_base: "http://ollama:11434"
197 |      ```
198 | 
199 | 5. Create an API key for the Chat UI:
200 |    - Go to "Keys" section
201 |    - Click "Create Key"
202 |    - Name: `chat-ui-key`
203 |    - Select all models you want to make available
204 |    - Copy the generated key
205 | 
206 | ### 2. Configure the Chat UI (Open WebUI)
207 | 
208 | 1. Open: http://chat.project.docker
209 | 2. Create an admin account:
210 |    - Username: `admin`
211 |    - Email: `admin@example.com`
212 |    - Password: (choose a secure password)
213 | 
214 | 3. Configure model connection:
215 |    - Go to Admin Settings → Connections
216 |    - Add OpenAI-compatible connection:
217 |      - URL: `http://llmrouter:4000/v1`
218 |      - API Key: (paste the key from LiteLLM)
219 |    - Disable the default OpenAI connection
220 | 
221 | 4. Verify models are available:
222 |    - Go to the chat interface
223 |    - Check that your models appear in the model selector
224 | 
225 | ### 3. Pull Local Models (If Using Ollama)
226 | 
227 | If you started Ollama, download models to test with:
228 | 
229 | ```bash
230 | # Pull a small, fast model for testing
231 | docker exec -it ollama ollama pull gemma2:2b
232 | 
233 | # Or pull a larger, more capable model
234 | docker exec -it ollama ollama pull llama3.2:3b
235 | 
236 | # Verify the model is downloaded
237 | docker exec -it ollama ollama list
238 | ```
239 | 
240 | ## Testing Your Setup
241 | 
242 | 1. **Test via LiteLLM:**
243 |    - Go to http://llm.project.docker/ui
244 |    - Use the Playground to test models
245 | 
246 | 2. **Test via Chat UI:**
247 |    - Go to http://chat.project.docker
248 |    - Start a new chat and select a model
249 | 
250 | 3. **Test Ollama directly (if started):**
251 |    ```bash
252 |    docker exec -it ollama ollama run gemma2:2b "Hello, how are you?"
253 |    ```
254 | 
255 | ## Service URLs
256 | 
257 | - **Chat Interface**: http://chat.project.docker
258 | - **Model Router UI**: http://llm.project.docker/ui
259 | - **Model Router API**: http://llm.project.docker/v1
260 | 
261 | ## Troubleshooting
262 | 
263 | ### Services not starting?
264 | ```bash
265 | # Check service status
266 | docker compose ps
267 | 
268 | # View detailed logs
269 | docker compose logs [service-name]
270 | 
271 | # Common services: llmrouter, chatui, routerdb, chatuidb
272 | ```
273 | 
274 | ### Can't access URLs?
275 | - Ensure hosts file is updated (see Configure Local Domain)
276 | - Check if port 80 is available: `lsof -i :80` (macOS/Linux)
277 | - Try accessing via localhost: http://localhost instead
278 | - Verify the proxy is running: `docker compose logs proxy`
279 | 
280 | ### Models not showing up?
281 | 1. Ensure API keys are properly set in `.env`
282 | 2. If using Ollama, ensure models are pulled
283 | 3. Restart the model router after adding models:
284 |    ```bash
285 |    docker compose restart llmrouter
286 |    ```
287 | 
288 | ### Database connection issues?
289 | - Wait a few moments for databases to initialize
290 | - Check database logs: `docker compose logs routerdb chatuidb`
291 | 
292 | ## Next Steps
293 | 
294 | 1. **Add local models**: Start Ollama and explore https://ollama.com/library
295 | 2. **Configure document extraction**: Start Docling for advanced RAG capabilities
296 | 3. **Set up vector search**: Start Qdrant for semantic search
297 | 4. **Production setup**: 
298 |    - Enable HTTPS with proper certificates
299 |    - Configure authentication (Keycloak is included but disabled)
300 |    - Set stronger passwords in `.env`
301 | 
302 | ## Useful Commands
303 | 
304 | ```bash
305 | # Stop all services
306 | docker compose down
307 | 
308 | # Stop and remove all data
309 | docker compose down -v
310 | 
311 | # Update services
312 | docker compose pull
313 | docker compose up -d
314 | 
315 | # View resource usage
316 | docker stats
317 | 
318 | # Execute commands in containers
319 | docker exec -it llmrouter /bin/bash
320 | docker exec -it chatui /bin/bash
321 | ```
322 | 
323 | ## Support
324 | 
325 | - Check the main README for architectural details
326 | - Report issues at: https://github.com/complexity-science-hub/llm-in-a-box-template/issues
327 | - LiteLLM docs: https://docs.litellm.ai/
328 | - Open WebUI docs: https://docs.openwebui.com/
329 | - Ollama docs: https://ollama.com/
330 | - docling docs: https://docling-project.github.io/docling/


--------------------------------------------------------------------------------
/{{cookiecutter.project_slug}}/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | # https://geshan.com.np/blog/2025/02/ollama-docker-compose/
  2 | services:
  3 |   proxy:
  4 |     image: traefik:v3.5
  5 |     container_name: proxy
  6 |     ports:
  7 |       - "80:80"
  8 |       #- "443:443"
  9 |     environment:
 10 |     #   - CF_API_EMAIL=$CLOUDFLARE_EMAIL
 11 |     #   - CF_DNS_API_TOKEN=$CLOUDFLARE_API_KEY
 12 |       - TZ=${TZ}
 13 |       - ROOT_DOMAIN=${ROOT_DOMAIN}
 14 |     profiles:
 15 |       - proxy
 16 |       - llminabox
 17 |       - auth
 18 |       - chat-ui
 19 |     volumes:
 20 |       - "/var/run/docker.sock:/var/run/docker.sock:ro"
 21 |       - ./services/proxy/rules:/rules
 22 |       - proxy_logs:/logs
 23 |       #- proxy_certs:/letsencrypt
 24 |     networks:
 25 |        - proxy_net
 26 |     restart: unless-stopped
 27 |     command:
 28 |       # TODO disable
 29 |       # - --log.level=DEBUG
 30 |       - "--providers.docker=true"
 31 |       - --providers.docker.endpoint=unix:///var/run/docker.sock # Use Docker Socket Proxy instead for improved security
 32 |       - "--providers.docker.exposedbydefault=false"
 33 |       - "--providers.docker.network=proxy_net"
 34 |       - "--entrypoints.web.address=:80"
 35 |       #- --accesslog.filepath=/logs/access.log
 36 |       - "--entrypoints.web.asdefault=true"
 37 |     #   - "--entrypoints.websecure.address=:443"
 38 |     #   - --entrypoints.websecure.http.tls.options=tls-opts@file
 39 |     #   # Allow these IPs to set the X-Forwarded-* headers - Cloudflare IPs: https://www.cloudflare.com/ips/
 40 |     #   - --entrypoints.websecure.forwardedHeaders.trustedIPs=$CLOUDFLARE_IPS,$LOCAL_IPS
 41 | 
 42 |       - --providers.file.directory=/rules
 43 |       - --providers.file.watch=true # Only works on top level files in the rules folder
 44 |       #- "--providers.file.filename=/etc/traefik/dynamic_conf.toml"
 45 |       #- "--providers.file.filename=/rules/dynamic_conf.toml"
 46 |     #   - --certificatesResolvers.dns-cloudflare.acme.email=$CF_API_EMAIL
 47 |     #   - --certificatesResolvers.dns-cloudflare.acme.storage=/letsencrypt/acme.json
 48 |     #   - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.provider=cloudflare
 49 |     #   - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.resolvers=1.1.1.1:53,1.0.0.1:53
 50 |     #   - --certificatesResolvers.dns-cloudflare.acme.dnsChallenge.delayBeforeCheck=90 # To delay DNS check and reduce LE hitrate
 51 |     #   - --entryPoints.web.http.redirections.entrypoint.to=websecure
 52 |     #   - --entryPoints.web.http.redirections.entrypoint.scheme=https
 53 |   # auth:
 54 |   #   image: quay.io/keycloak/keycloak:26.2
 55 |   #   depends_on:
 56 |   #     authdb:
 57 |   #       condition: service_healthy
 58 |   #   container_name: keycloak
 59 |   #   restart: unless-stopped
 60 |   #   # Just-in-time build + runtime options
 61 |   #   command: >
 62 |   #     start
 63 |   #     --db=postgres
 64 |   #     --hostname auth.${ROOT_DOMAIN}
 65 |   #     --http-enabled=true
 66 |   #     --proxy-headers=xforwarded
 67 |   #     --health-enabled=true
 68 |   #     --metrics-enabled=true
 69 | 
 70 |   #   environment:
 71 |   #     KC_DB_URL: jdbc:postgresql://authdb:5432/${AUTH_DB}
 72 |   #     KC_DB_USERNAME: ${AUTH_DB_USER}
 73 |   #     KC_DB_PASSWORD: ${AUTH_DB_PASSWORD}
 74 |   #     KC_LOG_LEVEL: INFO
 75 |   #     KC_BOOTSTRAP_ADMIN_USERNAME: ${KC_BOOTSTRAP_ADMIN_USERNAME}
 76 |   #     KC_BOOTSTRAP_ADMIN_PASSWORD: ${KC_BOOTSTRAP_ADMIN_PASSWORD}
 77 |   #   #ports:
 78 |   #   #  - "8080:8080"   # HTTP left open for the reverse-proxy on the same LAN
 79 |   #   #  - "9000:9000"   # management port: /health and /metrics
 80 |   #   profiles:
 81 |   #     - auth
 82 |   #   networks:
 83 |   #     - proxy_net
 84 |   #     - auth_net
 85 |     
 86 |   modelserverollama-cpu:
 87 |     image: ollama/ollama:0.12.11
 88 |     volumes:
 89 |       - ollama:/root/.ollama
 90 |     container_name: ollama
 91 |     restart: unless-stopped
 92 |     profiles:
 93 |       - ollama-cpu
 94 |       - llx
 95 |     networks:
 96 |       model_router_net:
 97 |         aliases:
 98 |           - ollama
 99 |   modelserverollama-gpu:
100 |     image: ollama/ollama:0.12.11
101 |     #ports:
102 |     #  - 11434:11434
103 |     volumes:
104 |       - ollama:/root/.ollama
105 |     container_name: ollama
106 |     restart: unless-stopped
107 |     profiles:
108 |       - ollama-gpu
109 |     networks:
110 |       model_router_net:
111 |         aliases:
112 |           - ollama
113 |     deploy:
114 |       resources:
115 |         reservations:
116 |           devices:
117 |             - driver: nvidia
118 |               count: all
119 |               capabilities: [gpu]
120 | 
121 |   modelservervllm:
122 |     build:
123 |     # docker build -f docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g .
124 |     # docker build -f services/model-server/vllm/docker/Dockerfile.cpu -t vllm-cpu-env --shm-size=4g services/model-server/vllm
125 |       context: ./services/model-server/vllm
126 |       # TODO select the right architecture/accelerator support here
127 |       # TODO document & expose as easy to use docker compose profile flags
128 |       # TODO add gpu claims here in case of GPU support
129 |       dockerfile: docker/Dockerfile.arm
130 |       target: build
131 |       shm_size: "4gb"
132 |     image: llminabox/modelserver:v0.10.0-cpu-arm
133 |     container_name: modelserver
134 |     hostname: modelserver
135 |     environment:
136 |       HF_HOME: /root/models
137 |       HUGGING_FACE_HUB_TOKEN: ${HUGGING_FACE_HUB_TOKEN}
138 |       # In case a enterprise artifact repository and huggingface mirror is used
139 |       # HF_ENDPOINT: ${HF_ENDPOINT}
140 |       VLLM_CPU_OMP_THREADS_BIND: "all"   # or "0-3|4-7"
141 |       VLLM_CPU_KVCACHE_SPACE: "16"
142 |     # cpuset: "0-3"
143 |     command:
144 |       - "--model"
145 |       - "meta-llama/Llama-3.2-1B-Instruct"
146 |       # "microsoft/Phi-4-mini-instruct"
147 |       - "--port"
148 |       - "8000"
149 |     # restart: unless-stopped
150 |     ports:
151 |       - "8000:8000"
152 |     profiles:
153 |       - model-server-cpu-arm
154 |     volumes:
155 |       - vllm:/root/models
156 |   modelservervllm-gpu:
157 |     image: vllm/vllm-openai:v0.11.0
158 |     container_name: modelserver
159 |     hostname: modelserver
160 |     environment:
161 |       HF_HOME: /root/.vllm
162 |       HUGGING_FACE_HUB_TOKEN: ${HUGGING_FACE_HUB_TOKEN}
163 |       # In case a enterprise artifact repository and huggingface mirror is used
164 |       # HF_ENDPOINT: ${HF_ENDPOINT}
165 |       VLLM_CPU_OMP_THREADS_BIND: "all"   # or "0-3|4-7"
166 |       VLLM_CPU_KVCACHE_SPACE: "16"
167 |     # cpuset: "0-3"
168 |     command:
169 |       - "--model"
170 |       - "meta-llama/Llama-3.2-1B-Instruct"
171 |       # "microsoft/Phi-4-mini-instruct"
172 |       - "--port"
173 |       - "8000"
174 |     # restart: unless-stopped
175 |     ports:
176 |       - "8000:8000"
177 |     profiles:
178 |       - model-server-gpu
179 |     volumes:
180 |       - vllm:/root/.vllm
181 | 
182 | 
183 |   # authdb:
184 |   #   image: postgres:17.5-alpine3.22
185 |   #   container_name: authdb
186 |   #   restart: unless-stopped
187 |   #   environment:
188 |   #     POSTGRES_DB: ${AUTH_DB}
189 |   #     POSTGRES_USER: ${AUTH_DB_USER}
190 |   #     POSTGRES_PASSWORD: ${AUTH_DB_PASSWORD}
191 |   #   healthcheck:
192 |   #     test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ]
193 |   #     interval: 5s
194 |   #     timeout: 10s
195 |   #     retries: 10
196 |   #   profiles:
197 |   #     - auth
198 |   #   volumes:
199 |   #     - llm_model_auth_db:/var/lib/postgresql/data
200 |   #   networks:
201 |   #      - auth_net
202 |   routerdb:
203 |     image: postgres:18.1-alpine3.22
204 |     container_name: routerdb
205 |     restart: unless-stopped
206 |     environment:
207 |       POSTGRES_DB: ${LLM_ROUTER_DB}
208 |       POSTGRES_USER: ${LLM_ROUTER_DB_USER}
209 |       POSTGRES_PASSWORD: ${LLM_ROUTER_DB_PASSWORD}
210 |     healthcheck:
211 |       test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ]
212 |       interval: 5s
213 |       timeout: 10s
214 |       retries: 10
215 |     profiles:
216 |       - model-router
217 |       - llminabox
218 |       - llx
219 |     volumes:
220 |       - llm_model_router_db:/var/lib/postgresql/data
221 |     networks:
222 |        - model_router_net
223 | 
224 |   llm_router:
225 |     image: litellm/litellm:v1.79.1-stable
226 |     platform: linux/amd64
227 |     container_name: llmrouter
228 |     restart: unless-stopped
229 |     environment:
230 |       - LITELLM_MASTER_KEY=sk-${LITELLM_MASTER_KEY}
231 |       - LITELLM_SALT_KEY=${LITELLM_SALT_KEY}
232 |       - STORE_MODEL_IN_DB="True"
233 |       - DATABASE_URL=postgres://${LLM_ROUTER_DB_USER}:${LLM_ROUTER_DB_PASSWORD}@routerdb:5432/${LLM_ROUTER_DB}
234 |       - OPENAI_API_KEY=${ROUTER_OPENAI_API_KEY}
235 |       - ANTHROPIC_API_KEY=${ROUTER_ANTHROPIC_API_KEY}
236 |       - LITELLM_MODE="PRODUCTION"
237 |       - UI_USERNAME=${LITELLM_UI_USERNAME}
238 |       - UI_PASSWORD=${LITELLM_UI_PASSWORD}
239 |     profiles:
240 |       - model-router
241 |       - llminabox
242 |       - llx
243 |     networks:
244 |       - model_router_net
245 |       - llm_net
246 |       - proxy_net
247 |     expose:
248 |       - "4000"
249 |     depends_on:
250 |       routerdb:
251 |         condition: service_healthy
252 |       #- ollama
253 |     healthcheck:
254 |       test: [ "CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:4000/health/liveliness || exit 1" ]
255 |       interval: 30s
256 |       timeout: 10s
257 |       retries: 3
258 |       start_period: 40s
259 |     #ports:
260 |     #  - "4000:4000"
261 |     # :4000 is api; /ui -> UI
262 |     volumes:
263 |       - ./services/llm-router/litellm_config.yml:/app/config.yaml
264 |       - ./services/llm-router/google_vertexai.json:/secrets/google_vertexai.json
265 |     #  --detailed_debug
266 |     command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "2" ]
267 |     labels:
268 |       - "traefik.enable=true"
269 |       - "traefik.http.routers.litellm.rule=Host(`llm.${ROOT_DOMAIN}`)"
270 |       - "traefik.http.services.litellm.loadbalancer.server.port=4000"
271 |     #   - "traefik.http.routers.litellm.tls.certresolver=dns-cloudflare"
272 |       - "traefik.http.routers.litellm.middlewares=chain-no-auth@file"
273 | 
274 |   chatuidb:
275 |     image: postgres:18.1-alpine3.22
276 |     container_name: chatuidb
277 |     restart: unless-stopped
278 |     environment:
279 |       POSTGRES_DB: ${CHAT_UI_DB}
280 |       POSTGRES_USER: ${CHAT_UI_DB_USER}
281 |       POSTGRES_PASSWORD: ${CHAT_UI_DB_PASSWORD}
282 |     healthcheck:
283 |       test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ]
284 |       interval: 5s
285 |       timeout: 10s
286 |       retries: 10
287 |     profiles:
288 |       - chat-ui
289 |       - llminabox
290 |     volumes:
291 |       - llm_chat_ui_db:/var/lib/postgresql/data
292 |     networks:
293 |        - chatui_net
294 | 
295 |   docling-cpu:
296 |     image: quay.io/docling-project/docling-serve-cpu:v1.1.0
297 |     container_name: docling
298 |     #ports:
299 |     #  - 5001:5001
300 |     profiles:
301 |       - docling-cpu
302 |     networks:
303 |       - chatui_net
304 |     healthcheck:
305 |       test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
306 |       interval: 15s
307 |       retries: 5
308 | 
309 |   docling-gpu:
310 |     image: quay.io/docling-project/docling-serve-cu124:v1.1.0
311 |     container_name: docling
312 |     #ports:
313 |     #  - 5001:5001
314 |     profiles:
315 |       - docling-gpu
316 |     networks:
317 |       - chatui_net
318 |     healthcheck:
319 |       test: ["CMD", "curl", "-f", "http://localhost:5001/health"]
320 |       interval: 15s
321 |       retries: 5
322 |     deploy:
323 |       resources:
324 |         reservations:
325 |           devices:
326 |             - driver: nvidia
327 |               count: all
328 |               capabilities: [gpu]
329 | 
330 |   vectordb-qdrant-cpu:
331 |     image: qdrant/qdrant:v1.15.5
332 |     container_name: qdrant
333 |     hostname: qdrant
334 |     restart: unless-stopped
335 |     environment:
336 |       QDRANT__SERVICE__API_KEY: ${QDRANT__SERVICE__API_KEY}
337 |     profiles:
338 |       - vectordb-cpu
339 |     networks:
340 |       - chatui_net
341 |     # configs:
342 |     #   - source: qdrant_config
343 |     #     target: /qdrant/config/production.yaml
344 |     volumes:
345 |       - qdrant:/qdrant/storage
346 |   # vectordb-qdrant-cpu-healthcheck:
347 |   #   restart: always 
348 |   #   image: curlimages/curl:latest
349 |   #   entrypoint: ["/bin/sh", "-c", "--", "while true; do sleep 30; done;"]
350 |   #   profiles:
351 |   #     - vectordb-cpu
352 |   #   depends_on:
353 |   #     - qdrant
354 |   #   healthcheck:
355 |   #     test: ["CMD", "curl", "-f", "http://qdrant:6333/readyz"]
356 |   #     interval: 10s
357 |   #     timeout: 2s
358 |   #     retries: 5
359 |   llm_chat_ui:
360 |   # TODO: create GPU variation as well for faster whisper and embeddings
361 |   # TODO add a vector database elastic? qdrant?
362 |     image: ghcr.io/open-webui/open-webui:v0.6.36
363 |     container_name: chatui
364 |     volumes:
365 |       - open-webui:/app/backend/data
366 |     depends_on:
367 |       chatuidb:
368 |         condition: service_healthy
369 |       llm_router:
370 |         condition: service_healthy
371 |     environment:
372 |       WEBUI_SECRET_KEY: ${CHAT_UI_SECRET_KEY}
373 |       CORS_ALLOW_ORIGIN: http://chat.${ROOT_DOMAIN}
374 |       CONTENT_EXTRACTION_ENGINE: docling
375 |       DOCLING_SERVER_URL: http://docling:5001
376 |       DATABASE_URL: postgresql://${CHAT_UI_DB_USER}:${CHAT_UI_DB_PASSWORD}@chatuidb:5432/${CHAT_UI_DB}
377 |       ENABLE_IMAGE_GENERATION: "True"
378 |       ENABLE_CHANNELS: "True"
379 |       IMAGE_GENERATION_ENGINE: "openai"
380 |       IMAGE_SIZE: "1024x1024"
381 |       IMAGE_GENERATION_MODEL: "dall-e-3"
382 |       ENABLE_OLLAMA_API: "False"
383 |       ENABLE_OPENAI_API: "True"
384 |       #OPENAI_API_BASE_URLS: "https://api.openai.com/v1;http://llmrouter:4000/v1"
385 |       #OPENAI_API_KEYS: "${ROUTER_OPENAI_API_KEY};${LLM_ROUTER_KEY_FOR_CHAT_UI}"
386 |       IMAGES_OPENAI_API_KEY: "${ROUTER_OPENAI_API_KEY}"
387 |       VECTOR_DB: qdrant
388 |       QDRANT_API_KEY: ${QDRANT__SERVICE__API_KEY}
389 |       QDRANT_URI: http://qdrant:6333
390 |       ENABLE_QDRANT_MULTITENANCY_MODE: "True"
391 |       # ENABLE_OPENAI_API, OPENAI_API_KEY, OPENAI_API_KEYS
392 |       # VECTOR_DB, ELASTICSEARCH_URL
393 |   #     - 'OLLAMA_BASE_URL=http://ollama:11434'
394 |     restart: unless-stopped
395 |     profiles:
396 |        - chat-ui
397 |        - llminabox
398 |     networks:
399 |       - proxy_net
400 |       - llm_net
401 |       - chatui_net
402 |     labels:
403 |       - "traefik.enable=true"
404 |       - "traefik.http.routers.chat.rule=Host(`chat.${ROOT_DOMAIN}`)"
405 |       - "traefik.http.routers.chat.service=chat"
406 |       - "traefik.http.services.chat.loadbalancer.server.port=8080"
407 |       - "traefik.http.routers.chat.middlewares=chain-no-auth@file"
408 | 
409 | networks:
410 |   proxy_net:
411 |     name: proxy_net
412 |     # external: true
413 |   model_router_net:
414 |   auth_net:
415 |   llm_net:
416 |   chatui_net:
417 |   
418 | volumes:
419 |   ollama: {}
420 |   vllm: {}
421 |   llm_model_router_db: {}
422 |   #llm_model_auth_db: {}
423 |   llm_chat_ui_db: {}
424 |   proxy_logs: {}
425 |   proxy_certs: {}
426 |   open-webui: {}
427 |   qdrant: {}
428 | 


--------------------------------------------------------------------------------