├── .prettierignore
├── examples
    ├── agno
    │   ├── calculator-agent
    │   │   ├── __init__.py
    │   │   ├── requirements.txt
    │   │   ├── Dockerfile
    │   │   ├── .terraform.lock.hcl
    │   │   ├── main.tf
    │   │   ├── openwebui_pipe_function.py
    │   │   ├── agent.template.yaml
    │   │   └── index.mjs
    │   └── namespace.yaml
    ├── mcp-server
    │   ├── calculator
    │   │   ├── __init__.py
    │   │   ├── requirements.txt
    │   │   ├── Dockerfile
    │   │   ├── main.tf
    │   │   ├── mcp-server.template.yaml
    │   │   ├── .terraform.lock.hcl
    │   │   ├── server.py
    │   │   └── index.mjs
    │   └── namespace.yaml
    └── strands-agents
    │   ├── calculator-agent
    │       ├── __init__.py
    │       ├── requirements.txt
    │       ├── Dockerfile
    │       ├── .terraform.lock.hcl
    │       ├── main.tf
    │       ├── openwebui_pipe_function.py
    │       ├── agent.template.yaml
    │       └── index.mjs
    │   └── namespace.yaml
├── workshops
    ├── eks-genai-workshop
    │   ├── .env.workshop
    │   ├── static
    │   │   ├── aws-logo.png
    │   │   ├── images
    │   │   │   ├── module-1
    │   │   │   │   ├── flies.png
    │   │   │   │   ├── logs.png
    │   │   │   │   ├── models.png
    │   │   │   │   ├── vllm.png
    │   │   │   │   ├── sign-up.png
    │   │   │   │   ├── get-started.png
    │   │   │   │   ├── bedrock-review-submit.png
    │   │   │   │   ├── claude-with-rag-context.png
    │   │   │   │   ├── bedrock-model-access-next.png
    │   │   │   │   ├── bedrock-model-access-page.png
    │   │   │   │   ├── claude-without-rag-context.png
    │   │   │   │   ├── bedrock-select-claude-sonnet.png
    │   │   │   │   ├── openwebui-create-knowledge-base.png
    │   │   │   │   ├── openwebui-knowledge-workspace.png
    │   │   │   │   ├── openwebui-select-knowledge-base.png
    │   │   │   │   ├── model-comparison-claude-vs-llama.png
    │   │   │   │   └── claude-kubernetes-operators-response.png
    │   │   │   ├── module-2
    │   │   │   │   ├── snake.png
    │   │   │   │   ├── trace.png
    │   │   │   │   ├── traces.png
    │   │   │   │   ├── copy-key.png
    │   │   │   │   ├── langfuse.png
    │   │   │   │   ├── test-key.png
    │   │   │   │   ├── create-key.png
    │   │   │   │   ├── dashboard.png
    │   │   │   │   ├── generation.png
    │   │   │   │   ├── new-model.png
    │   │   │   │   ├── virtual-key.png
    │   │   │   │   ├── final-widget.png
    │   │   │   │   ├── go-to-project.png
    │   │   │   │   ├── back-dashboard.png
    │   │   │   │   ├── cost-dashboard.png
    │   │   │   │   ├── create-dashboard.png
    │   │   │   │   ├── final-dashboard.png
    │   │   │   │   ├── langfuse-login.png
    │   │   │   │   ├── select-test-key.png
    │   │   │   │   ├── specific-trace.png
    │   │   │   │   ├── value-rendered.png
    │   │   │   │   ├── latency-dashboard.png
    │   │   │   │   ├── litellm-login-page.png
    │   │   │   │   ├── cost-dashboard-long.png
    │   │   │   │   ├── litellm-test-interface.png
    │   │   │   │   ├── litellm-usage-analytics.png
    │   │   │   │   ├── litellm-api-landing-page.png
    │   │   │   │   └── litellm-models-dashboard.png
    │   │   │   ├── introduction
    │   │   │   │   ├── url.png
    │   │   │   │   ├── archi.png
    │   │   │   │   ├── open-aws-console.png
    │   │   │   │   ├── workshopstudio-event1.jpg
    │   │   │   │   ├── workshopstudio-event2.jpg
    │   │   │   │   └── workshopstudio-event3.jpg
    │   │   │   └── module-3
    │   │   │   │   ├── example1.png
    │   │   │   │   ├── example2.png
    │   │   │   │   ├── gen-ai-on-eks.png
    │   │   │   │   └── LoanBuddy-Observability.png
    │   │   ├── configuration
    │   │   │   └── index.en.md
    │   │   ├── code
    │   │   │   └── module3
    │   │   │   │   └── credit-validation
    │   │   │   │       ├── example1.png
    │   │   │   │       ├── example2.png
    │   │   │   │       ├── .gitignore
    │   │   │   │       ├── Dockerfile
    │   │   │   │       ├── .example.env
    │   │   │   │       ├── pyvenv.cfg
    │   │   │   │       ├── build-push-docker-image.sh
    │   │   │   │       └── requirements.txt
    │   │   └── iam-policy.json
    │   ├── config.workshop.json
    │   └── content
    │   │   └── introduction
    │   │       └── getting-started
    │   │           └── index.en.md
    └── README.md
├── .prettierrc
├── components
    ├── llm-model
    │   ├── tgi
    │   │   ├── namespace.yaml
    │   │   ├── secret.template.yaml
    │   │   ├── archive
    │   │   │   ├── pvc-neuron-cache.yaml
    │   │   │   ├── job-tgi-neuron-build.template.yaml
    │   │   │   └── main.tf
    │   │   ├── pvc-huggingface-cache.yaml
    │   │   ├── index.mjs
    │   │   ├── model-qwen3-8b.template.yaml
    │   │   ├── model-qwen3-8b-fp8.template.yaml
    │   │   └── model-deepseek-r1-qwen3-8b.template.yaml
    │   ├── vllm
    │   │   ├── namespace.yaml
    │   │   ├── secret.template.yaml
    │   │   ├── pvc-neuron-cache.yaml
    │   │   ├── pvc-huggingface-cache.yaml
    │   │   ├── archive
    │   │   │   ├── job-vllm-neuron-build.template.yaml
    │   │   │   └── main.tf
    │   │   ├── index.mjs
    │   │   ├── model-gemma3-27b-gptq.template.yaml
    │   │   ├── model-deepseek-r1-qwen3-8b.template.yaml
    │   │   ├── model-qwen3-30b-thinking-fp8.template.yaml
    │   │   ├── model-gpt-oss-20b.template.yaml
    │   │   ├── model-qwen3-30b-instruct-fp8.template.yaml
    │   │   ├── model-qwen3-32b-fp8.template.yaml
    │   │   ├── model-qwen3-coder-30b-fp8.template.yaml
    │   │   ├── model-gpt-oss-120b.template.yaml
    │   │   ├── model-qwen3-8b-neuron.template.yaml
    │   │   ├── model-magistral-24b-fp8.template.yaml
    │   │   └── model-deepseek-r1-qwen3-8b-neuron.template.yaml
    │   ├── ollama
    │   │   ├── namespace.yaml
    │   │   ├── service.yaml
    │   │   ├── pvc.yaml
    │   │   ├── configmap.template.yaml
    │   │   ├── ingress.template.yaml
    │   │   ├── deployment.template.yaml
    │   │   └── index.mjs
    │   └── sglang
    │   │   ├── namespace.yaml
    │   │   ├── secret.template.yaml
    │   │   ├── pvc.yaml
    │   │   ├── index.mjs
    │   │   ├── model-gpt-oss-20b.template.yaml
    │   │   ├── model-qwen3-32b-fp8.template.yaml
    │   │   ├── model-qwen3-30b-instruct-fp8.template.yaml
    │   │   ├── model-qwen3-30b-thinking-fp8.template.yaml
    │   │   └── model-qwen3-coder-30b-fp8.template.yaml
    ├── embedding-model
    │   └── tei
    │   │   ├── namespace.yaml
    │   │   ├── secret.template.yaml
    │   │   ├── pvc.yaml
    │   │   ├── index.mjs
    │   │   ├── model-qwen3-embedding-4b-bf16.template.yaml
    │   │   ├── model-qwen3-embedding-8b-bf16.template.yaml
    │   │   ├── model-qwen3-embedding-06b-bf16.template.yaml
    │   │   └── model-qwen3-embedding-06b-bf16-cpu.template.yaml
    ├── guardrail
    │   └── guardrails-ai
    │   │   ├── docker
    │   │       ├── requirements.txt
    │   │       ├── config.py
    │   │       ├── docker-entrypoint.sh
    │   │       ├── Dockerfile
    │   │       └── build-image.sh
    │   │   ├── GUARDRAILS_AI.md
    │   │   ├── app.template.yaml
    │   │   ├── index.mjs
    │   │   └── .terraform.lock.hcl
    ├── vector-database
    │   ├── milvus
    │   │   ├── secret.template.yaml
    │   │   ├── ingress.template.yaml
    │   │   ├── values.template.yaml
    │   │   └── main.tf
    │   ├── qdrant
    │   │   ├── secret.template.yaml
    │   │   ├── values.template.yaml
    │   │   └── ingress.template.yaml
    │   └── chroma
    │   │   ├── values.template.yaml
    │   │   └── index.mjs
    ├── o11y
    │   ├── phoenix
    │   │   ├── values.template.yaml
    │   │   └── index.mjs
    │   ├── mlflow
    │   │   ├── values.template.yaml
    │   │   ├── .terraform.lock.hcl
    │   │   ├── index.mjs
    │   │   └── main.tf
    │   └── langfuse
    │   │   ├── .terraform.lock.hcl
    │   │   ├── index.mjs
    │   │   └── main.tf
    ├── workflow-automation
    │   └── n8n
    │   │   ├── values.template.yaml
    │   │   └── index.mjs
    ├── gui-app
    │   └── openwebui
    │   │   ├── values.template.yaml
    │   │   └── index.mjs
    └── ai-gateway
    │   ├── kong
    │       ├── kong.template.yaml
    │       ├── KONG.md
    │       ├── index.mjs
    │       ├── examples
    │       │   └── kong.yaml
    │       └── values.template.yaml
    │   └── litellm
    │       ├── .terraform.lock.hcl
    │       └── main.tf
├── .vscode
    └── settings.json
├── assets
    ├── openwebui_functions.png
    ├── openwebui_embedding_model.png
    └── openwebui_embedding_calculator_agent.png
├── .gitleaks.toml
├── CODE_OF_CONDUCT.md
├── package.json
├── .gitignore
├── .env
├── docs
    └── INFRA_SETUP.md
├── terraform
    ├── eks.tf.template
    ├── vpc.tf
    ├── modules
    │   ├── eks-standard-mode
    │   │   └── variables.tf
    │   └── eks-auto-mode
    │   │   └── variables.tf
    ├── efs.tf
    ├── alb-acm.tf
    └── variables.tf
├── LICENSE
├── cli-menu.json
└── ecr-image-sync.sh


/.prettierignore:
--------------------------------------------------------------------------------
1 | *.template.yaml
2 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/.env.workshop:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 120
3 | }
4 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/requirements.txt:
--------------------------------------------------------------------------------
1 | fastmcp==2.13.0


--------------------------------------------------------------------------------
/workshops/README.md:
--------------------------------------------------------------------------------
1 | Host code/settings and md files for the workshops here
2 | 


--------------------------------------------------------------------------------
/examples/agno/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: agno
5 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: tgi
5 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "cSpell.words": [
3 |         "agentic",
4 |         "configmap"
5 |     ]
6 | }


--------------------------------------------------------------------------------
/components/llm-model/vllm/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: vllm
5 | 


--------------------------------------------------------------------------------
/examples/mcp-server/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: mcp-server
5 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: tei
5 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | guardrails-ai[api]>=0.5.0
2 | gunicorn[gthread]>=22.0.0,<23


--------------------------------------------------------------------------------
/components/llm-model/ollama/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: ollama
5 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: sglang
5 | 


--------------------------------------------------------------------------------
/examples/strands-agents/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: strands-agents
5 | 


--------------------------------------------------------------------------------
/assets/openwebui_functions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_functions.png


--------------------------------------------------------------------------------
/assets/openwebui_embedding_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_embedding_model.png


--------------------------------------------------------------------------------
/assets/openwebui_embedding_calculator_agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_embedding_calculator_agent.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/aws-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/aws-logo.png


--------------------------------------------------------------------------------
/.gitleaks.toml:
--------------------------------------------------------------------------------
1 | [allowlist]
2 | description = "Ignore specific test/dummy keys"
3 | regexes = [
4 |   '''LITELLM_API_KEY''',
5 |   '''LANGFUSE_PUBLIC_KEY''',
6 |   '''LANGFUSE_SECRET_KEY'''
7 | ]


--------------------------------------------------------------------------------
/components/llm-model/tgi/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: hf-token
5 |   namespace: tgi
6 | type: Opaque
7 | stringData:
8 |   token: {{{HF_TOKEN}}}


--------------------------------------------------------------------------------
/components/embedding-model/tei/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: hf-token
5 |   namespace: tei
6 | type: Opaque
7 | stringData:
8 |   token: {{{HF_TOKEN}}}


--------------------------------------------------------------------------------
/components/llm-model/sglang/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: hf-token
5 |   namespace: sglang
6 | type: Opaque
7 | stringData:
8 |   token: {{{HF_TOKEN}}}


--------------------------------------------------------------------------------
/components/llm-model/vllm/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: hf-token
5 |   namespace: vllm
6 | type: Opaque
7 | stringData:
8 |   token: {{{HF_TOKEN}}}
9 | 


--------------------------------------------------------------------------------
/components/vector-database/milvus/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: basic-auth
5 |   namespace: milvus
6 | type: Opaque
7 | stringData:
8 |   auth: {{{AUTH}}}


--------------------------------------------------------------------------------
/components/vector-database/qdrant/secret.template.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 |   name: basic-auth
5 |   namespace: qdrant
6 | type: Opaque
7 | stringData:
8 |   auth: {{{AUTH}}}


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/flies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/flies.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/logs.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/models.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/vllm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/vllm.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/snake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/snake.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/trace.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/traces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/traces.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/url.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/url.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/sign-up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/sign-up.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/copy-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/copy-key.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/langfuse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/langfuse.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/test-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/test-key.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-3/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/example1.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-3/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/example2.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/archi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/archi.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/get-started.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/get-started.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/create-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/create-key.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/generation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/generation.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/new-model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/new-model.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/virtual-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/virtual-key.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/final-widget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/final-widget.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/go-to-project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/go-to-project.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-3/gen-ai-on-eks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/gen-ai-on-eks.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/back-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/back-dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/create-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/create-dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/final-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/final-dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/langfuse-login.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/langfuse-login.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/select-test-key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/select-test-key.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/specific-trace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/specific-trace.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/value-rendered.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/value-rendered.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/configuration/index.en.md:
--------------------------------------------------------------------------------
1 | ---
2 | title : "Configuration"
3 | weight : 20
4 | ---
5 | 
6 | # Configuration
7 | Find out how to create and organize your content quickly and intuitively.
8 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/latency-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/latency-dashboard.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/litellm-login-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-login-page.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/open-aws-console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/open-aws-console.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/bedrock-review-submit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-review-submit.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard-long.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard-long.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/code/module3/credit-validation/example1.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/code/module3/credit-validation/example2.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/claude-with-rag-context.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-with-rag-context.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/litellm-test-interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-test-interface.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/litellm-usage-analytics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-usage-analytics.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-3/LoanBuddy-Observability.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/LoanBuddy-Observability.png


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/requirements.txt:
--------------------------------------------------------------------------------
 1 | agno==1.7.0
 2 | boto3==1.39.1
 3 | openai==1.93.0
 4 | mcp==1.10.0
 5 | fastapi[standard]==0.115.13
 6 | pydantic==2.11.7
 7 | langfuse==3.0.5
 8 | openlit==1.34.23
 9 | SQLAlchemy==2.0.41
10 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event1.jpg


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event2.jpg


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event3.jpg


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-next.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-next.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-page.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/claude-without-rag-context.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-without-rag-context.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/litellm-api-landing-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-api-landing-page.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-2/litellm-models-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-models-dashboard.png


--------------------------------------------------------------------------------
/components/llm-model/ollama/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: ollama
 5 |   namespace: ollama
 6 | spec:
 7 |   selector:
 8 |     app: ollama
 9 |   ports:
10 |     - name: http
11 |       port: 11434
12 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/bedrock-select-claude-sonnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-select-claude-sonnet.png


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/requirements.txt:
--------------------------------------------------------------------------------
1 | strands-agents==0.1.8
2 | strands-agents[otel]==0.1.8
3 | strands-agents-tools==0.1.6
4 | mcp==1.10.0
5 | fastapi[standard]==0.115.13
6 | pydantic==2.11.7
7 | litellm==1.73.0
8 | langfuse==3.0.5


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/openwebui-create-knowledge-base.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-create-knowledge-base.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/openwebui-knowledge-workspace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-knowledge-workspace.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/openwebui-select-knowledge-base.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-select-knowledge-base.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/model-comparison-claude-vs-llama.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/model-comparison-claude-vs-llama.png


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/images/module-1/claude-kubernetes-operators-response.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-kubernetes-operators-response.png


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/docker/config.py:
--------------------------------------------------------------------------------
1 | from guardrails.hub import DetectPII
2 | from guardrails import Guard
3 | 
4 | guard = Guard()
5 | guard.name = "detect-pii"
6 | guard.use(DetectPII(pii_entities=["EMAIL_ADDRESS", "IP_ADDRESS"], on_fail="fix"))
7 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python libraries and virtual environments
 2 | venv/
 3 | env/
 4 | lib/
 5 | bin/
 6 | include/
 7 | __pycache__/
 8 | *.pyc
 9 | *.pyo
10 | *.egg-info/
11 | dist/
12 | build/
13 | .env
14 | 


--------------------------------------------------------------------------------
/components/vector-database/chroma/values.template.yaml:
--------------------------------------------------------------------------------
 1 | chromadb:
 2 |   auth:
 3 |     enabled: false
 4 |   data:
 5 |     storageClass: ebs
 6 |     volumeSize: 10Gi
 7 | 
 8 | resources:
 9 |   requests:
10 |     cpu: 1 
11 |     memory: 2Gi
12 |   limits:
13 |     memory: 2Gi
14 | 


--------------------------------------------------------------------------------
/components/llm-model/ollama/pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: ollama-cache
 5 |   namespace: ollama
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | WORKDIR /app
 4 | COPY requirements.txt .
 5 | RUN pip install --no-cache-dir -r requirements.txt
 6 | COPY __init__.py .
 7 | COPY agent.py .
 8 | EXPOSE 80
 9 | CMD ["fastapi", "run", "agent.py", "--proxy-headers", "--port", "80"]
10 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: huggingface-cache
 5 |   namespace: tei
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: huggingface-cache
 5 |   namespace: sglang
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/pvc-neuron-cache.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: neuron-cache
 5 |   namespace: vllm
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | WORKDIR /app
 4 | COPY requirements.txt .
 5 | RUN pip install --no-cache-dir -r requirements.txt
 6 | COPY __init__.py .
 7 | COPY agent.py .
 8 | EXPOSE 80
 9 | CMD ["fastapi", "run", "agent.py", "--proxy-headers", "--port", "80"]
10 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/archive/pvc-neuron-cache.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: neuron-cache
 5 |   namespace: tgi
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/pvc-huggingface-cache.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: huggingface-cache
 5 |   namespace: tgi
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/pvc-huggingface-cache.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: huggingface-cache
 5 |   namespace: vllm
 6 | spec:
 7 |   storageClassName: efs
 8 |   accessModes:
 9 |     - ReadWriteMany
10 |   resources:
11 |     requests:
12 |       storage: 100Gi # Dummy
13 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | WORKDIR /app
 4 | COPY requirements.txt .
 5 | RUN pip install --no-cache-dir -r requirements.txt
 6 | COPY __init__.py .
 7 | COPY server.py .
 8 | EXPOSE 8000
 9 | CMD ["fastmcp", "run", "server.py", "--transport", "http", "--host", "0.0.0.0", "--port", "8000"]
10 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "genai-on-eks-starter-kit",
 3 |   "version": "1.0.0",
 4 |   "type": "module",
 5 |   "dependencies": {
 6 |     "commander": "11.1.0",
 7 |     "dotenv": "16.5.0",
 8 |     "handlebars": "4.7.8",
 9 |     "inquirer": "12.6.3",
10 |     "lodash": "4.17.21",
11 |     "prettier": "3.5.3",
12 |     "zx": "8.5.4"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Terraform
 2 | .terraform/
 3 | *.tfstate
 4 | *.tfstate.*
 5 | *.tfvars
 6 | *.tfvars.json
 7 | 
 8 | # Node.js
 9 | node_modules/
10 | 
11 | # Python
12 | __pycache__/
13 | 
14 | # Project
15 | .DS_Store
16 | .kiro/
17 | .backup/
18 | .temp/
19 | .kubeconfig
20 | .env.local
21 | config.local.json
22 | *.rendered.yaml
23 | test*.py
24 | test*.mjs
25 | terraform/eks.tf
26 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/iam-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Version": "2012-10-17",
 3 |   "Statement": [
 4 |     {
 5 |       "Effect": "Allow",
 6 |       "Action": [
 7 |         "cloudformation:ListStacks",
 8 |         "cloudformation:DescribeStacks",
 9 |         "cloudformation:DescribeStackEvents",
10 |         "cloudformation:DescribeStackResources",
11 |         "cloudformation:GetTemplate"
12 |       ],
13 |       "Resource": "*"
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/components/llm-model/ollama/configmap.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: ollama-init-script
 5 |   namespace: ollama
 6 | data:
 7 |   init-ollama.sh: |
 8 |     #!/bin/bash
 9 | 
10 |     while ! /bin/ollama ps > /dev/null 2>&1; do
11 |       sleep 5
12 |     done
13 | 
14 |     models=({{{models}}})
15 | 
16 |     for model in "${models[@]}"; do
17 |       echo "Pulling model: $model"
18 |       /bin/ollama pull "$model"
19 |     done
20 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.12-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt .
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY credit-underwriting-agent.py .
 9 | COPY mcp-address-validator.py .
10 | COPY mcp-image-processor.py .
11 | COPY mcp-income-employment-validator.py .
12 | 
13 | COPY utils.py .
14 | COPY *.png .
15 | 
16 | EXPOSE 8080
17 | 
18 | ENV PYTHONUNBUFFERED=1
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/.example.env:
--------------------------------------------------------------------------------
 1 | # Gateway Configuration
 2 | GATEWAY_MODEL_ACCESS_KEY=your_model_access_key_here
 3 | GATEWAY_URL=https://your-api-gateway-url.amazonaws.com
 4 | 
 5 | # Langfuse Configuration (Optional - for tracing)
 6 | LANGFUSE_URL=https://your-langfuse-instance.com
 7 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key
 8 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key
 9 | 
10 | # S3 Configuration
11 | S3_BUCKET_NAME=your-s3-bucket-name
12 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/pyvenv.cfg:
--------------------------------------------------------------------------------
1 | home = /opt/homebrew/opt/python@3.13/bin
2 | include-system-site-packages = false
3 | version = 3.13.3
4 | executable = /opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/bin/python3.13
5 | command = /opt/homebrew/opt/python@3.13/bin/python3.13 -m venv /Users/wangaws/Documents/Workshop/sample-genai-on-eks-starter-kit/workshops/module-3-building-genai-applications-DEPRECATED/code/credit-validation
6 | 


--------------------------------------------------------------------------------
/components/o11y/phoenix/values.template.yaml:
--------------------------------------------------------------------------------
 1 | ingress:
 2 |   annotations:
 3 |     alb.ingress.kubernetes.io/target-type: ip
 4 |   {{#if DOMAIN}}
 5 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
 6 |   host: phoenix.{{{DOMAIN}}}
 7 |   {{/if}}
 8 |   
 9 | resources:
10 |   requests:
11 |     cpu: 500m
12 |     memory: 2Gi
13 |   limits:
14 |     memory: 2Gi
15 | 
16 | postgresql:
17 |   primary:
18 |     resources: 
19 |       requests:
20 |         cpu: 125m
21 |         memory: 256Mi
22 |       limits:
23 |         memory: 256Mi
24 | 


--------------------------------------------------------------------------------
/components/workflow-automation/n8n/values.template.yaml:
--------------------------------------------------------------------------------
 1 | ingress:
 2 |   enabled: true
 3 |   annotations:
 4 |     alb.ingress.kubernetes.io/target-type: ip
 5 |   {{#if DOMAIN}}
 6 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
 7 |   hosts:
 8 |     - host: n8n.{{{DOMAIN}}}
 9 |   {{else}}
10 |   hosts:
11 |     - host:
12 |   {{/if}}      
13 |       paths: ["/"]
14 |   tls: []
15 | 
16 | main:
17 |   {{#unless DOMAIN}}
18 |   extraEnv:
19 |     N8N_SECURE_COOKIE:
20 |       value: "false"
21 |   {{/unless}}
22 |   resources:
23 |     requests:
24 |       cpu: 1 
25 |       memory: 2Gi
26 |     limits:
27 |       memory: 2Gi
28 | 


--------------------------------------------------------------------------------
/components/llm-model/ollama/ingress.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   annotations:
 5 |     alb.ingress.kubernetes.io/target-type: ip
 6 |     {{#if DOMAIN}}
 7 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
 8 |     {{/if}}
 9 |   name: ollama
10 |   namespace: ollama
11 | spec:
12 |   rules:
13 |     - http:
14 |         paths:
15 |           - backend:
16 |               service:
17 |                 name: ollama
18 |                 port:
19 |                   name: http
20 |             path: /
21 |             pathType: Prefix
22 |       {{#if DOMAIN}}
23 |       host: ollama.{{{DOMAIN}}}
24 |       {{/if}}            


--------------------------------------------------------------------------------
/components/vector-database/qdrant/values.template.yaml:
--------------------------------------------------------------------------------
 1 | ingress:
 2 |   enabled: true
 3 |   annotations:
 4 |     external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only
 5 |     nginx.ingress.kubernetes.io/auth-type: basic
 6 |     nginx.ingress.kubernetes.io/auth-secret: basic-auth
 7 |     nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
 8 |   ingressClassName: nginx
 9 |   hosts:
10 |     - paths:
11 |         - path: /
12 |           pathType: Prefix
13 |           servicePort: 6333
14 |       {{#if DOMAIN}}    
15 |       host: qdrant.{{{DOMAIN}}}
16 |       {{/if}}
17 | 
18 | resources:
19 |   requests:
20 |     cpu: 1 
21 |     memory: 2Gi
22 |   limits:
23 |     memory: 2Gi


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
 1 | REGION=us-west-2
 2 | EKS_CLUSTER_NAME=genai-on-eks
 3 | EKS_MODE=auto
 4 | DOMAIN=
 5 | HF_TOKEN=
 6 | # LiteLLM
 7 | LITELLM_UI_USERNAME=admin
 8 | LITELLM_UI_PASSWORD=Pass@123
 9 | LITELLM_API_KEY=sk-1234
10 | # Kong
11 | KONG_API_KEY=sk-1234
12 | KONG_API_KEY_HEADER=apikey
13 | # Langfuse
14 | LANGFUSE_USERNAME=admin@example.com
15 | LANGFUSE_PASSWORD=Pass@123
16 | LANGFUSE_PUBLIC_KEY=lf_pk_1234567890
17 | LANGFUSE_SECRET_KEY=lf_sk_1234567890
18 | # MLflow
19 | MLFLOW_USERNAME=admin
20 | MLFLOW_PASSWORD=Password@123 # Min 12 characters
21 | # Pheonix
22 | PHOENIX_API_KEY=
23 | # qDrant
24 | QDRANT_USERNAME=admin
25 | QDRANT_PASSWORD=Pass@123
26 | # Milvus
27 | MILVUS_USERNAME=admin
28 | MILVUS_PASSWORD=Pass@123
29 | # Guardrails AI
30 | GUARDRAILS_AI_API_KEY=


--------------------------------------------------------------------------------
/docs/INFRA_SETUP.md:
--------------------------------------------------------------------------------
 1 | ## Infrastructure Setup
 2 | 
 3 | Several AWS services and Kubernetes components are being provisioned by the main Terraform code under the `terraform` folder.
 4 | 
 5 | ### AWS Services
 6 | 
 7 | - One VPC with private/public subnets and single NAT gateway
 8 | - One EKS Auto Mode cluster
 9 | - One EFS file system for caching Hugging Face models and etc
10 | - One ACM wildcard certificate for the provided domin
11 | 
12 | ### Kubenertes Components
13 | 
14 | - Setup Ingress to provision the shared ALB
15 | - Setup ExternalDNS to manage the DNS records for the public facing services
16 | - Setup Ingress NGINX Controller to use HTTP Basic authentication for some public facing services
17 | - Setup EFS CSI driver
18 | - Setup StorageClass for EBS and EFS
19 | 


--------------------------------------------------------------------------------
/components/vector-database/qdrant/ingress.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   annotations:
 5 |     alb.ingress.kubernetes.io/target-type: ip
 6 |     {{#if DOMAIN}}
 7 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
 8 |     {{/if}}
 9 |   name: qdrant-alb
10 |   namespace: ingress-nginx
11 | spec:
12 |   ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
13 |   rules:
14 |     - http:
15 |         paths:
16 |           - path: /
17 |             pathType: Prefix
18 |             backend:
19 |               service:
20 |                 name: ingress-nginx-controller
21 |                 port:
22 |                   number: 80
23 |       {{#if DOMAIN}}
24 |       host: qdrant.{{{DOMAIN}}}
25 |       {{/if}}      


--------------------------------------------------------------------------------
/components/gui-app/openwebui/values.template.yaml:
--------------------------------------------------------------------------------
 1 | nameOverride: openwebui
 2 | ingress:
 3 |   enabled: true
 4 |   annotations:
 5 |     alb.ingress.kubernetes.io/target-type: ip
 6 |   {{#if DOMAIN}}
 7 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
 8 |   host: openwebui.{{{DOMAIN}}}
 9 |   {{else}}
10 |   host:
11 |   {{/if}}
12 | 
13 | # openaiBaseApiUrl: https://litellm.{{{DOMAIN}}}/v1
14 | openaiBaseApiUrl: http://litellm.litellm:4000/v1
15 | extraEnvVars:
16 |   - name: OPENAI_API_KEY
17 |     value: {{{LITELLM_API_KEY}}}
18 | 
19 | ollama:
20 |   enabled: false
21 | pipelines:
22 |   enabled: false
23 | 
24 | persistence:
25 |   enabled: true
26 |   size: 100Gi
27 | 
28 | resources:
29 |   requests:
30 |     cpu: 1 
31 |     memory: 2Gi
32 |   limits:
33 |     memory: 2Gi
34 | 


--------------------------------------------------------------------------------
/terraform/eks.tf.template:
--------------------------------------------------------------------------------
 1 | module "eks_{{{EKS_MODE}}}_mode" {
 2 |   source = "./modules/eks-{{{EKS_MODE}}}-mode"
 3 | 
 4 |   name                         = var.name
 5 |   region                       = var.region
 6 |   eks_cluster_version          = var.eks_cluster_version
 7 |   vpc_id                       = module.vpc.vpc_id
 8 |   vpc_cidr                     = var.vpc_cidr
 9 |   subnet_ids                   = module.vpc.private_subnets
10 |   domain                       = var.domain
11 |   efs_file_system_id           = aws_efs_file_system.this.id
12 |   gpu_nodepool_capacity_type   = var.gpu_nodepool_capacity_type
13 |   gpu_nodepool_instance_family = var.gpu_nodepool_instance_family
14 |   enable_nginx                 = var.enable_nginx
15 |   enable_lws                   = var.enable_lws
16 | }
17 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/docker/docker-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | echo "Configuring Guardrails AI..."
 5 | 
 6 | # Check if GUARDRAILS_TOKEN is set
 7 | if [ -z "$GUARDRAILS_TOKEN" ]; then
 8 |     echo "Error: GUARDRAILS_TOKEN environment variable is not set"
 9 |     exit 1
10 | fi
11 | 
12 | # Configure guardrails with token from environment
13 | guardrails configure --disable-metrics --enable-remote-inferencing --token $GUARDRAILS_TOKEN
14 | 
15 | # Install validators from hub
16 | echo "Installing validators from Guardrails Hub..."
17 | guardrails hub install hub://guardrails/detect_pii
18 | 
19 | echo "Starting Guardrails API server..."
20 | # Start the application
21 | exec gunicorn --bind 0.0.0.0:8000 --timeout=90 --workers=4 'guardrails_api.app:create_app(None, "config.py")'
22 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/GUARDRAILS_AI.md:
--------------------------------------------------------------------------------
 1 | # Guardrails AI
 2 | 
 3 | References:
 4 | 
 5 | - https://www.guardrailsai.com/docs/how_to_guides/hosting_with_docker
 6 | - https://github.com/guardrails-ai/guardrails-lite-server
 7 | - https://github.com/guardrails-ai/detect_pii
 8 | 
 9 | ## Test
10 | 
11 | - Via Open WebUI:
12 | 
13 | ```
14 | Validate this email address - genai-on-eks@example.com
15 | 
16 | Validate this IP address - 50.0.10.1
17 | 
18 | Validate this phone number - 829-456-7890
19 | ```
20 | 
21 | - Via curl to Guardrails Server pod:
22 | 
23 | ```
24 | curl -X 'POST' \
25 |   'http://localhost:8000/guards/detect-pii/validate' \
26 |   -H 'accept: application/json' \
27 |   -H 'Content-Type: application/json' \
28 |   -d '{
29 |       "llmOutput": "My email address is john.doe@example.com, my IP address and 192.168.1.1, and my phone number is 123-456-7890"
30 | }'
31 | ```
32 | 


--------------------------------------------------------------------------------
/terraform/vpc.tf:
--------------------------------------------------------------------------------
 1 | locals {
 2 |   azs = slice(data.aws_availability_zones.available.names, 0, min(length(data.aws_availability_zones.available.names), 4))
 3 | }
 4 | 
 5 | # AWS VPC
 6 | module "vpc" {
 7 |   source  = "terraform-aws-modules/vpc/aws"
 8 |   version = "5.21.0"
 9 | 
10 |   name = var.name
11 |   cidr = var.vpc_cidr
12 | 
13 |   azs             = local.azs
14 |   private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 4, k)]
15 |   public_subnets  = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 8, k + 64)]
16 |   # intra_subnets   = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 8, k + 80)]
17 | 
18 |   enable_nat_gateway = true
19 |   single_nat_gateway = true
20 | 
21 |   public_subnet_tags = {
22 |     "kubernetes.io/role/elb" = 1
23 |   }
24 | 
25 |   private_subnet_tags = {
26 |     "kubernetes.io/role/internal-elb" = 1
27 |     "karpenter.sh/discovery"          = var.name
28 |   }
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/components/ai-gateway/kong/kong.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: configuration.konghq.com/v1
 2 | kind: KongClusterPlugin
 3 | metadata:
 4 |   name: key-auth
 5 |   namespace: kong
 6 |   annotations:
 7 |     kubernetes.io/ingress.class: kong
 8 |   labels:
 9 |     global: "true"
10 | plugin: key-auth
11 | config:
12 |   key_in_query: false
13 |   key_names: ["{{{KONG_API_KEY_HEADER}}}"]
14 | ---
15 | apiVersion: v1
16 | kind: Secret
17 | metadata:
18 |   name: default-credential
19 |   namespace: kong
20 |   annotations:
21 |     kubernetes.io/ingress.class: kong
22 |   labels:
23 |     konghq.com/credential: key-auth
24 | type: Opaque
25 | stringData:
26 |   key: {{{KONG_API_KEY}}} 
27 | ---
28 | apiVersion: configuration.konghq.com/v1
29 | kind: KongConsumer
30 | metadata:
31 |   name: default
32 |   namespace: kong
33 |   annotations:
34 |     kubernetes.io/ingress.class: kong
35 | username: default
36 | credentials:
37 |   - default-credential
38 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | locals {
21 |   app       = "calculator"
22 |   namespace = "mcp-server"
23 |   full_name = "${var.name}-${local.namespace}-${local.app}"
24 | }
25 | resource "aws_ecr_repository" "this" {
26 |   name                 = local.full_name
27 |   image_tag_mutability = "MUTABLE"
28 |   force_delete         = true
29 |   
30 |   image_scanning_configuration {
31 |     scan_on_push = true
32 |   }
33 | 
34 |   encryption_configuration {
35 |     encryption_type = "KMS"
36 |   }
37 | }
38 | output "ecr_repository_url" {
39 |   value = aws_ecr_repository.this.repository_url
40 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/mcp-server.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: calculator
 5 |   namespace: mcp-server
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: calculator
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: calculator
15 |     spec:
16 |       {{#unless useBuildx}}
17 |       nodeSelector:
18 |         kubernetes.io/arch: {{{arch}}}
19 |       {{/unless}}
20 |       containers:
21 |         - name: server
22 |           image: {{{IMAGE}}}
23 |           ports:
24 |             - name: http
25 |               containerPort: 8000
26 |           resources:
27 |             requests:
28 |               cpu: 250m 
29 |               memory: 512Mi
30 |             limits:
31 |               memory: 512Mi
32 | ---
33 | apiVersion: v1
34 | kind: Service
35 | metadata:
36 |   name: calculator
37 |   namespace: mcp-server
38 | spec:
39 |   selector:
40 |     app: calculator
41 |   ports:
42 |     - name: http
43 |       port: 8000
44 | 
45 | 


--------------------------------------------------------------------------------
/terraform/modules/eks-standard-mode/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "name" { type = string }
 2 | variable "region" { type = string }
 3 | variable "eks_cluster_version" { type = string }
 4 | variable "vpc_id" { type = string }
 5 | variable "vpc_cidr" { type = string }
 6 | variable "subnet_ids" { type = list(string) }
 7 | variable "domain" { type = string }
 8 | variable "efs_file_system_id" { type = string }
 9 | variable "gpu_nodepool_capacity_type" { type = list(string) }
10 | variable "gpu_nodepool_instance_family" { type = list(string) }
11 | 
12 | terraform {
13 |   required_version = ">= 1.5"
14 | 
15 |   required_providers {
16 |     aws = {
17 |       source  = "hashicorp/aws"
18 |       version = "~> 6.15.0"
19 |     }
20 |     kubernetes = {
21 |       source  = "hashicorp/kubernetes"
22 |       version = "~> 2.38.0"
23 |     }
24 |     helm = {
25 |       source  = "hashicorp/helm"
26 |       version = "~> 2.17.0"
27 |     }
28 |     kubectl = {
29 |       source  = "alekc/kubectl"
30 |       version = "~> 2.1.3"
31 |     }
32 |     local = {
33 |       source  = "hashicorp/local"
34 |       version = "~> 2.5.3"
35 |     }
36 |   }
37 | }


--------------------------------------------------------------------------------
/terraform/efs.tf:
--------------------------------------------------------------------------------
 1 | # EFS Security Group
 2 | resource "aws_security_group" "efs" {
 3 |   name        = "${var.name}-efs-sg"
 4 |   description = "Security group for EFS"
 5 |   vpc_id      = module.vpc.vpc_id
 6 | 
 7 |   ingress {
 8 |     description = "Allow NFS traffic from private subnets"
 9 |     from_port   = 2049
10 |     to_port     = 2049
11 |     protocol    = "tcp"
12 |     cidr_blocks = module.vpc.private_subnets_cidr_blocks
13 |   }
14 | 
15 |   tags = {
16 |     Name                     = "${var.name}-efs-sg"
17 |     "karpenter.sh/discovery" = var.name
18 |   }
19 | }
20 | 
21 | resource "aws_efs_file_system" "this" {
22 |   creation_token  = "${var.name}-efs"
23 |   encrypted       = true
24 |   throughput_mode = var.efs_throughput_mode
25 | 
26 |   lifecycle_policy {
27 |     transition_to_ia = "AFTER_7_DAYS"
28 |   }
29 |   lifecycle_policy {
30 |     transition_to_primary_storage_class = "AFTER_1_ACCESS"
31 |   }
32 |   tags = {
33 |     Name = "${var.name}-efs"
34 |   }
35 | }
36 | 
37 | resource "aws_efs_mount_target" "this" {
38 |   count = length(module.vpc.private_subnets)
39 | 
40 |   file_system_id  = aws_efs_file_system.this.id
41 |   subnet_id       = module.vpc.private_subnets[count.index]
42 |   security_groups = [aws_security_group.efs.id]
43 | }


--------------------------------------------------------------------------------
/terraform/modules/eks-auto-mode/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "name" { type = string }
 2 | variable "region" { type = string }
 3 | variable "eks_cluster_version" { type = string }
 4 | variable "vpc_id" { type = string }
 5 | variable "vpc_cidr" { type = string }
 6 | variable "subnet_ids" { type = list(string) }
 7 | variable "domain" { type = string }
 8 | variable "efs_file_system_id" { type = string }
 9 | variable "gpu_nodepool_capacity_type" { type = list(string) }
10 | variable "gpu_nodepool_instance_family" { type = list(string) }
11 | variable "enable_nginx" {
12 |   type    = bool
13 |   default = true
14 | }
15 | variable "enable_lws" {
16 |   type    = bool
17 |   default = true
18 | }
19 | 
20 | terraform {
21 |   required_version = ">= 1.5"
22 | 
23 |   required_providers {
24 |     aws = {
25 |       source  = "hashicorp/aws"
26 |       version = "~> 6.15.0"
27 |     }
28 |     kubernetes = {
29 |       source  = "hashicorp/kubernetes"
30 |       version = "~> 2.38.0"
31 |     }
32 |     helm = {
33 |       source  = "hashicorp/helm"
34 |       version = "~> 2.17.0"
35 |     }
36 |     kubectl = {
37 |       source  = "alekc/kubectl"
38 |       version = "~> 2.1.3"
39 |     }
40 |     local = {
41 |       source  = "hashicorp/local"
42 |       version = "~> 2.5.3"
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM public.ecr.aws/docker/library/python:3.12-slim
 2 | 
 3 | # Create app directory
 4 | WORKDIR /app
 5 | 
 6 | # print the version just to verify
 7 | RUN python3 --version
 8 | # start the virtual environment
 9 | RUN python3 -m venv /opt/venv
10 | 
11 | # Enable venv
12 | ENV PATH="/opt/venv/bin:$PATH"
13 | 
14 | # Install some utilities; you may not need all of these
15 | RUN apt-get update
16 | RUN apt-get install -y git
17 | 
18 | # Copy the requirements file
19 | COPY requirements*.txt .
20 | 
21 | # Install app dependencies
22 | # If you use Poetry this step might be different
23 | RUN pip install -r requirements-lock.txt
24 | 
25 | # Set the directory for nltk data
26 | ENV NLTK_DATA=/opt/nltk_data
27 | 
28 | # Download punkt data
29 | RUN python -m nltk.downloader -d /opt/nltk_data punkt
30 | 
31 | # Copy the rest over (including entrypoint script)
32 | # We use a .dockerignore to keep unwanted files exluded
33 | COPY . .
34 | 
35 | # Make entrypoint script executable
36 | RUN chmod +x /app/docker-entrypoint.sh
37 | 
38 | EXPOSE 8000
39 | 
40 | # Use entrypoint script that will configure guardrails and install hub validators at runtime
41 | # The GUARDRAILS_TOKEN will be provided as an environment variable from Kubernetes
42 | ENTRYPOINT ["/app/docker-entrypoint.sh"]
43 | 


--------------------------------------------------------------------------------
/components/ai-gateway/kong/KONG.md:
--------------------------------------------------------------------------------
 1 | # Kong AI Gateway OSS
 2 | 
 3 | Currently, we only deploy Kong and setup the API key but we do not setup the routes and integration with OpenWeb UI.
 4 | 
 5 | Notes:
 6 | 
 7 | - [AI Proxy plugin](https://developer.konghq.com/plugins/ai-proxy) currently does not provide the simple way to setup mulitple models on the same URL path. See `components/ai-gateway/kong/examples/kong.yaml` as example to set a route for each model.
 8 | 
 9 | - Alternatively, check [this example](https://developer.konghq.com/plugins/ai-proxy/examples/sdk-two-routes/) for setting up the AI proxy plugin routing based on matching different URL paths.
10 | 
11 | - To integrate with Open WebUI, since the `/v1/models` path is not avaaiable, you will need to manually specify them on the `Model IDs` when creating the connection.
12 | 
13 | - [Kong Manager OSS](https://github.com/Kong/kong-manager) currently does not provide an easy way to implement the authentication without the licensed enterprise RBAC feature. The workaround now is to do the port forward.
14 | 
15 | ```
16 | # Kong Manager
17 | kubectl -n kong port-forward svc/kong-kong-manager 8002:8002
18 | 
19 | # Kong Admin API (also required since Kong Manager UI will connect directly to it from the browser)
20 | kubectl -n kong port-forward svc/kong-kong-admin 8001:800
21 | ```
22 | 


--------------------------------------------------------------------------------
/components/o11y/mlflow/values.template.yaml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   security:
 3 |     # -- Allow insecure images to use our ECR images.
 4 |     allowInsecureImages: true
 5 | 
 6 | ingress:
 7 |   enabled: true
 8 |   className: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
 9 |   annotations:
10 |       alb.ingress.kubernetes.io/target-type: ip
11 |       alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
12 |   hosts:
13 |     - paths:
14 |         - path: /
15 |           pathType: Prefix
16 |       host: mlflow.{{{DOMAIN}}}
17 | 
18 | extraEnvVars:
19 |   MLFLOW_SERVER_ALLOWED_HOSTS: "localhost:*,10.*,mlflow.mlflow,mlflow.{{{DOMAIN}}}"
20 |   # MLFLOW_LOGGING_LEVEL: "DEBUG"
21 | 
22 | auth:
23 |   enabled: true
24 |   adminUsername: {{{MLFLOW_USERNAME}}}
25 |   adminPassword: {{{MLFLOW_PASSWORD}}}
26 | 
27 | postgresql:
28 |   enabled: true
29 |   image:
30 |     registry: public.ecr.aws
31 |     repository: agentic-ai-platforms-on-k8s/postgresql
32 |     tag: 17.5.0-debian-12-r8
33 |     pullPolicy: IfNotPresent
34 |   auth:
35 |     username: admin
36 |     password: password123
37 |   primary:
38 |     resources: 
39 |       requests:
40 |         cpu: 125m
41 |         memory: 256Mi
42 |       limits:
43 |         memory: 256Mi
44 | 
45 | artifactRoot:
46 |   s3:
47 |     enabled: true
48 |     bucket: {{{MLFLOW_BUCKET_NAME}}}
49 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/config.workshop.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "demo": {
 3 |     "components": [
 4 |       { "category": "llm-model", "component": "vllm" },
 5 |       { "category": "o11y", "component": "langfuse" },
 6 |       { "category": "gui-app", "component": "openwebui" },
 7 |       { "category": "ai-gateway", "component": "litellm" }
 8 |     ],
 9 |     "examples": []
10 |   },
11 |   "llm-model": {
12 |     "vllm": {
13 |       "models": [
14 |         { "name": "qwen3-8b-neuron", "deploy": true, "neuron": true, "compile": false },
15 |         { "name": "deepseek-r1-qwen3-8b-neuron", "deploy": true, "neuron": true, "compile": false }
16 |       ]
17 |     },
18 |     "sglang": { "models": [] },
19 |     "tgi": { "models": [] },
20 |     "ollama": { "models": [] }
21 |   },
22 |   "embedding-model": {
23 |     "tei": { "models": [] }
24 |   },
25 |   "bedrock": {
26 |     "llm": { "models": [{ "name": "claude-3.7-sonnet", "model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0" }] },
27 |     "embedding": { "models": [] }
28 |   },
29 |   "docker": {
30 |     "useBuildx": false,
31 |     "arch": "arm64"
32 |   },
33 |   "terraform": {
34 |     "vars": {
35 |       "efs_throughput_mode": "elastic",
36 |       "neuron_nodepool_capacity_type": ["on-demand"],
37 |       "enable_nginx": false,
38 |       "enable_lws": false
39 |     }
40 |   }
41 | }
42 | 


--------------------------------------------------------------------------------
/terraform/alb-acm.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_acm_certificate" "wildcard" {
 2 |   count                     = var.domain != "" ? 1 : 0
 3 |   domain_name               = "*.${var.domain}"
 4 |   validation_method         = "DNS"
 5 |   subject_alternative_names = ["${var.domain}"]
 6 |   lifecycle {
 7 |     create_before_destroy = true
 8 |   }
 9 | }
10 | 
11 | data "aws_route53_zone" "selected" {
12 |   count        = var.domain != "" ? 1 : 0
13 |   name         = var.domain
14 |   private_zone = false
15 | }
16 | 
17 | resource "aws_route53_record" "validation" {
18 |   for_each = var.domain != "" ? {
19 |     for dvo in aws_acm_certificate.wildcard[0].domain_validation_options : dvo.domain_name => {
20 |       name   = dvo.resource_record_name
21 |       record = dvo.resource_record_value
22 |       type   = dvo.resource_record_type
23 |     }
24 |   } : {}
25 |   allow_overwrite = true
26 |   name            = each.value.name
27 |   records         = [each.value.record]
28 |   ttl             = 60
29 |   type            = each.value.type
30 |   zone_id         = data.aws_route53_zone.selected[0].zone_id
31 | }
32 | 
33 | resource "aws_acm_certificate_validation" "wildcard" {
34 |   count                   = var.domain != "" ? 1 : 0
35 |   certificate_arn         = aws_acm_certificate.wildcard[0].arn
36 |   validation_record_fqdns = [for record in aws_route53_record.validation : record.fqdn]
37 | }
38 | 


--------------------------------------------------------------------------------
/components/o11y/phoenix/index.mjs:
--------------------------------------------------------------------------------
 1 | import { fileURLToPath } from "url";
 2 | import path from "path";
 3 | import fs from "fs";
 4 | import handlebars from "handlebars";
 5 | import { $ } from "zx";
 6 | $.verbose = true;
 7 | 
 8 | export const name = "Phoenix";
 9 | const __filename = fileURLToPath(import.meta.url);
10 | const DIR = path.dirname(__filename);
11 | let BASE_DIR;
12 | let config;
13 | let utils;
14 | 
15 | export async function init(_BASE_DIR, _config, _utils) {
16 |   BASE_DIR = _BASE_DIR;
17 |   config = _config;
18 |   utils = _utils;
19 | }
20 | 
21 | export async function install() {
22 |   // const requiredEnvVars = [];
23 |   // utils.checkRequiredEnvVars(requiredEnvVars);
24 | 
25 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
26 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
27 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
28 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
29 |   const valuesVars = {
30 |     DOMAIN: process.env.DOMAIN,
31 |   };
32 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
33 |   await $`helm upgrade --install phoenix oci://registry-1.docker.io/arizephoenix/phoenix-helm --namespace phoenix --create-namespace -f ${valuesRenderedPath}`;
34 | }
35 | 
36 | export async function uninstall() {
37 |   await $`helm uninstall phoenix --namespace phoenix`;
38 | }
39 | 


--------------------------------------------------------------------------------
/components/workflow-automation/n8n/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "n8n";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   // const requiredEnvVars = [];
25 |   // utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
28 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
29 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
30 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
31 |   const valuesVars = {
32 |     DOMAIN: process.env.DOMAIN,
33 |   };
34 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
35 |   await $`helm upgrade --install n8n oci://8gears.container-registry.com/library/n8n --namespace n8n --create-namespace -f ${valuesRenderedPath}`;
36 | }
37 | 
38 | export async function uninstall() {
39 |   await $`helm uninstall n8n --namespace n8n`;
40 | }
41 | 


--------------------------------------------------------------------------------
/components/vector-database/chroma/index.mjs:
--------------------------------------------------------------------------------
 1 | import { fileURLToPath } from "url";
 2 | import path from "path";
 3 | import fs from "fs";
 4 | import handlebars from "handlebars";
 5 | import { $ } from "zx";
 6 | $.verbose = true;
 7 | 
 8 | export const name = "Chroma";
 9 | const __filename = fileURLToPath(import.meta.url);
10 | const DIR = path.dirname(__filename);
11 | let BASE_DIR;
12 | let config;
13 | let utils;
14 | 
15 | export async function init(_BASE_DIR, _config, _utils) {
16 |   BASE_DIR = _BASE_DIR;
17 |   config = _config;
18 |   utils = _utils;
19 | }
20 | 
21 | export async function install() {
22 |   // const requiredEnvVars = [];
23 |   // utils.checkRequiredEnvVars(requiredEnvVars);
24 | 
25 |   await $`helm repo add chroma https://amikos-tech.github.io/chromadb-chart`;
26 |   await $`helm repo update`;
27 | 
28 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
29 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
30 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
31 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
32 |   const valuesVars = {};
33 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
34 |   await $`helm upgrade --install chroma chroma/chromadb --namespace chroma --create-namespace -f ${valuesRenderedPath}`;
35 | }
36 | 
37 | export async function uninstall() {
38 |   await $`helm uninstall chroma --namespace chroma`;
39 | }
40 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/docker/build-image.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Build and push multi-arch image to public ECR
 5 | # Usage: ./build-image.sh [version]
 6 | 
 7 | PUBLIC_ECR_REGISTRY="public.ecr.aws/agentic-ai-platforms-on-k8s"
 8 | COMPONENT="guardrails-ai"
 9 | VERSION=${1:-"latest"}
10 | 
11 | echo "Building multi-arch image for component: ${COMPONENT}"
12 | echo "Version: ${VERSION}"
13 | echo "Registry: ${PUBLIC_ECR_REGISTRY}"
14 | 
15 | # Get the directory where this script is located
16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
17 | 
18 | # Authenticate to public ECR (public ECR requires us-east-1 region)
19 | echo "Authenticating to public ECR..."
20 | aws ecr-public get-login-password --region us-east-1 | \
21 |   docker login --username AWS --password-stdin public.ecr.aws
22 | 
23 | # Check if Dockerfile exists
24 | if [ ! -f "${SCRIPT_DIR}/Dockerfile" ]; then
25 |     echo "Error: Dockerfile not found in ${SCRIPT_DIR}"
26 |     exit 1
27 | fi
28 | 
29 | # Build and push multi-arch image
30 | echo "Building and pushing multi-arch image..."
31 | docker buildx build \
32 |   --platform linux/amd64,linux/arm64 \
33 |   -t ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:${VERSION} \
34 |   -t ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:latest \
35 |   --push \
36 |   ${SCRIPT_DIR}/
37 | 
38 | echo "Successfully built and pushed ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:${VERSION}"
39 | echo "Also tagged as: ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:latest"
40 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/archive/job-tgi-neuron-build.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: tgi-neuron-build
 5 |   namespace: tgi
 6 | spec:
 7 |   template:
 8 |     spec:
 9 |       restartPolicy: Never
10 |       securityContext:
11 |         seccompProfile:
12 |           type: RuntimeDefault
13 |       automountServiceAccountToken: false
14 |       # nodeSelector:
15 |       #   {{{KARPENTER_PREFIX}}}/instance-family: inf2
16 |       serviceAccountName: tgi-neuron-build
17 |       containers:
18 |         - name: kaniko
19 |           image: gcr.io/kaniko-project/executor:latest
20 |           args:
21 |             - "--context=git://github.com/aonz/text-generation-inference.git"
22 |             - "--dockerfile=Dockerfile.neuron"
23 |             - "--destination={{{IMAGE}}}"
24 |             # - "--verbosity=debug"
25 |           env:
26 |             - name: AWS_REGION
27 |               value: us-west-2
28 |           resources:
29 |             requests:
30 |               cpu: 24 #75%
31 |               memory: 96Gi #75%
32 |               aws.amazon.com/neuroncore: 2
33 |             limits:
34 |               aws.amazon.com/neuroncore: 2
35 |       tolerations:
36 |         - key: aws.amazon.com/neuron
37 |           operator: Exists
38 |           effect: NoSchedule
39 | ---
40 | apiVersion: v1
41 | kind: ServiceAccount
42 | metadata:
43 |   name: tgi-neuron-build
44 |   namespace: tgi
45 | automountServiceAccountToken: false
46 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/app.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Namespace
 3 | metadata:
 4 |   name: guardrails-ai
 5 | ---
 6 | apiVersion: v1
 7 | kind: Secret
 8 | metadata:
 9 |   name: guardrails-ai-secret
10 |   namespace: guardrails-ai
11 | type: Opaque
12 | stringData:
13 |   token: "{{{GUARDRAILS_TOKEN}}}"
14 | ---
15 | apiVersion: apps/v1
16 | kind: Deployment
17 | metadata:
18 |   name: guard
19 |   namespace: guardrails-ai
20 | spec:
21 |   replicas: 1
22 |   selector:
23 |     matchLabels:
24 |       app: guard
25 |   template:
26 |     metadata:
27 |       labels:
28 |         app: guard
29 |     spec:
30 |       {{#unless useBuildx}}
31 |       nodeSelector:
32 |         kubernetes.io/arch: {{{arch}}}
33 |       {{/unless}}
34 |       containers:
35 |         - name: guard
36 |           image: {{{IMAGE}}}
37 |           env:
38 |             - name: GUARDRAILS_TOKEN
39 |               valueFrom:
40 |                 secretKeyRef:
41 |                   name: guardrails-ai-secret
42 |                   key: token
43 |           ports:
44 |             - name: http
45 |               containerPort: 8000
46 |           resources:
47 |             requests:
48 |               cpu: 1.5
49 |               memory: 12Gi
50 |             limits:
51 |               memory: 12Gi
52 | ---
53 | apiVersion: v1
54 | kind: Service
55 | metadata:
56 |   name: guard
57 |   namespace: guardrails-ai
58 | spec:
59 |   selector:
60 |     app: guard
61 |   ports:
62 |     - name: http
63 |       port: 8000
64 | 


--------------------------------------------------------------------------------
/components/o11y/mlflow/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = "~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
10 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
11 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
12 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
13 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
14 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
15 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
16 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
17 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
18 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
19 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
20 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
21 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
22 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
23 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = "~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
10 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
11 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
12 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
13 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
14 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
15 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
16 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
17 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
18 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
19 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
20 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
21 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
22 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
23 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Guardrails AI";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["GUARDRAILS_AI_API_KEY"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   const { GUARDRAILS_AI_API_KEY } = process.env;
28 | 
29 |   // Use pre-built public ECR image instead of building locally
30 |   const publicEcrImage = "public.ecr.aws/agentic-ai-platforms-on-k8s/guardrails-ai:latest";
31 |   
32 |   const appTemplatePath = path.join(DIR, "app.template.yaml");
33 |   const appRenderedPath = path.join(DIR, "app.rendered.yaml");
34 |   const { arch } = config.docker;
35 |   const appVars = {
36 |     arch,
37 |     IMAGE: publicEcrImage,
38 |     GUARDRAILS_TOKEN: GUARDRAILS_AI_API_KEY,
39 |   };
40 |   utils.renderTemplate(appTemplatePath, appRenderedPath, appVars);
41 |   await $`kubectl apply -f ${DIR}/app.rendered.yaml`;
42 | }
43 | 
44 | export async function uninstall() {
45 |   await $`kubectl delete -f ${DIR}/app.rendered.yaml --ignore-not-found`;
46 | }
47 | 


--------------------------------------------------------------------------------
/components/vector-database/milvus/ingress.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   annotations:
 5 |     external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only
 6 |     nginx.ingress.kubernetes.io/auth-type: basic
 7 |     nginx.ingress.kubernetes.io/auth-secret: basic-auth
 8 |     nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
 9 |   name: milvus
10 |   namespace: milvus
11 | spec:
12 |   ingressClassName: nginx
13 |   rules:
14 |     - http:
15 |         paths:
16 |           - path: /
17 |             pathType: Prefix
18 |             backend:
19 |               service:
20 |                 name: milvus
21 |                 port:
22 |                   number: 9091
23 |       {{#if DOMAIN}}
24 |       host: milvus.{{{DOMAIN}}}
25 |       {{/if}}
26 | ---
27 | apiVersion: networking.k8s.io/v1
28 | kind: Ingress
29 | metadata:
30 |   annotations:
31 |     alb.ingress.kubernetes.io/target-type: ip
32 |     {{#if DOMAIN}}
33 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
34 |     {{/if}}
35 |   name: milvus-alb
36 |   namespace: ingress-nginx
37 | spec:
38 |   ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
39 |   rules:
40 |     - http:
41 |         paths:
42 |           - path: /
43 |             pathType: Prefix
44 |             backend:
45 |               service:
46 |                 name: ingress-nginx-controller
47 |                 port:
48 |                   number: 80
49 |       {{#if DOMAIN}}
50 |       host: milvus.{{{DOMAIN}}}
51 |       {{/if}}      


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/build-push-docker-image.sh:
--------------------------------------------------------------------------------
 1 | docker buildx rm multiarch-builder || true
 2 | 
 3 | # Create a new builder instance
 4 | docker buildx create --name multiarch-builder --use
 5 | 
 6 | # Build for both x86_64 and ARM64
 7 | docker buildx build --platform linux/amd64,linux/arm64 -t your-registry/loan-buddy:latest --push .
 8 | 
 9 | # Or build locally for testing
10 | # docker buildx build --platform linux/amd64,linux/arm64 -t loan-buddy:latest --load .
11 | 
12 | 
13 |         - name: LANGFUSE_URL
14 |           value: "http://langfuse-web.langfuse.svc.cluster.local:3000"
15 |         - name: LANGFUSE_PUBLIC_KEY
16 |           value: "lf_pk_1234567890"
17 |         - name: LANGFUSE_SECRET_KEY  
18 |           value: "lf_sk_1234567890"
19 |         - name: GATEWAY_URL
20 |           value: "http://litellm.litellm.svc.cluster.local:4000"  
21 |         - name: GATEWAY_MODEL_ACCESS_KEY
22 |           value: "sk-4qgicypE01dIhc5mPsBWDQ"
23 |         - name: S3_BUCKET_NAME
24 |           value: "langfuse"
25 |         - name: S3_ENDPOINT_URL
26 |           value: "http://langfuse-s3.langfuse.svc.cluster.local:9000"
27 |         - name: S3_ACCESS_KEY
28 |           value: "minio"
29 |         - name: S3_SECRET_KEY
30 |           value: "password123"
31 |         - name: MCP_ADDRESS_VALIDATOR
32 |           value: "http://mcp-address-validator:8000"
33 |         - name: MCP_EMPLOYMENT_VALIDATOR
34 |           value: "http://mcp-employment-validator:8000"
35 |         - name: MCP_IMAGE_PROCESSOR
36 |           value: "http://mcp-image-processor:8000"
37 | 


--------------------------------------------------------------------------------
/components/o11y/langfuse/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = "~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=",
10 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
11 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
12 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
13 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
14 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
15 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
16 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
17 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
18 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
19 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
20 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
21 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
22 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
23 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
24 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = "~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=",
10 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
11 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
12 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
13 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
14 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
15 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
16 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
17 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
18 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
19 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
20 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
21 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
22 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
23 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
24 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/components/guardrail/guardrails-ai/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = "~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=",
10 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
11 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
12 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
13 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
14 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
15 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
16 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
17 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
18 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
19 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
20 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
21 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
22 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
23 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
24 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/components/gui-app/openwebui/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Open WebUI";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["LITELLM_API_KEY"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await $`helm repo add open-webui https://open-webui.github.io/helm-charts`;
28 |   await $`helm repo update`;
29 | 
30 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
31 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
32 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
33 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
34 |   const valuesVars = {
35 |     DOMAIN: process.env.DOMAIN,
36 |     LITELLM_API_KEY: process.env.LITELLM_API_KEY,
37 |   };
38 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
39 |   await $`helm upgrade --install openwebui open-webui/open-webui --namespace openwebui --create-namespace -f ${valuesRenderedPath}`;
40 | }
41 | 
42 | export async function uninstall() {
43 |   await $`helm uninstall openwebui --namespace openwebui`;
44 | }
45 | 


--------------------------------------------------------------------------------
/components/ai-gateway/litellm/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = ">= 5.30.0, ~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=",
10 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
11 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
12 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
13 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
14 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
15 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
16 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
17 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
18 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
19 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
20 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
21 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
22 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
23 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
24 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/archive/job-vllm-neuron-build.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: vllm-neuron-build
 5 |   namespace: vllm
 6 | spec:
 7 |   template:
 8 |     spec:
 9 |       restartPolicy: Never
10 |       securityContext:
11 |         seccompProfile:
12 |           type: RuntimeDefault
13 |       automountServiceAccountToken: false
14 |       # nodeSelector:
15 |       #   {{{KARPENTER_PREFIX}}}/instance-family: inf2
16 |       serviceAccountName: vllm-neuron-build
17 |       containers:
18 |         - name: kaniko
19 |           image: gcr.io/kaniko-project/executor:latest
20 |           args:
21 |             # TODO: Switch back to AWS Neuron repo when the PR is merged.
22 |             # https://github.com/aws-neuron/upstreaming-to-vllm/pull/25
23 |             - "--context=git://github.com/aonz/upstreaming-to-vllm.git"
24 |             - "--dockerfile=docker/Dockerfile.neuron"
25 |             - "--destination={{{IMAGE}}}"
26 |             # - "--verbosity=debug"
27 |           env:
28 |             - name: AWS_REGION
29 |               value: us-west-2
30 |           resources:
31 |             requests:
32 |               cpu: 24 #75%
33 |               memory: 96Gi #90%
34 |               aws.amazon.com/neuron: 1
35 |             limits:
36 |               aws.amazon.com/neuron: 1
37 |       tolerations:
38 |         - key: aws.amazon.com/neuron
39 |           operator: Exists
40 |           effect: NoSchedule
41 | ---
42 | apiVersion: v1
43 | kind: ServiceAccount
44 | metadata:
45 |   name: vllm-neuron-build
46 |   namespace: vllm
47 | automountServiceAccountToken: false
48 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/.terraform.lock.hcl:
--------------------------------------------------------------------------------
 1 | # This file is maintained automatically by "terraform init".
 2 | # Manual edits may be lost in future updates.
 3 | 
 4 | provider "registry.terraform.io/hashicorp/aws" {
 5 |   version     = "5.96.0"
 6 |   constraints = ">= 5.30.0, ~> 5.96.0"
 7 |   hashes = [
 8 |     "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=",
 9 |     "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=",
10 |     "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68",
11 |     "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5",
12 |     "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c",
13 |     "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744",
14 |     "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448",
15 |     "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4",
16 |     "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145",
17 |     "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425",
18 |     "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67",
19 |     "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0",
20 |     "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6",
21 |     "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7",
22 |     "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4",
23 |     "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae",
24 |     "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f",
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | locals {
21 |   app       = "calculator-agent"
22 |   namespace = "agno"
23 |   full_name = "${var.name}-${local.namespace}-${local.app}"
24 | }
25 | resource "aws_ecr_repository" "this" {
26 |   name                 = local.full_name
27 |   image_tag_mutability = "MUTABLE"
28 |   force_delete         = true
29 | 
30 |   image_scanning_configuration {
31 |     scan_on_push = true
32 |   }
33 | 
34 |   encryption_configuration {
35 |     encryption_type = "KMS"
36 |   }
37 | }
38 | output "ecr_repository_url" {
39 |   value = aws_ecr_repository.this.repository_url
40 | }
41 | 
42 | module "pod_identity" {
43 |   source  = "terraform-aws-modules/eks-pod-identity/aws"
44 |   version = "1.12.0"
45 | 
46 |   name                 = local.full_name
47 |   use_name_prefix      = false
48 |   attach_custom_policy = true
49 |   policy_statements = [
50 |     {
51 |       sid = "Bedrock"
52 |       actions = [
53 |         "bedrock:InvokeModel",
54 |         "bedrock:InvokeModelWithResponseStream",
55 |       ]
56 |       resources = ["*"]
57 |     }
58 |   ]
59 |   associations = {
60 |     app = {
61 |       service_account = local.app
62 |       namespace       = local.namespace
63 |       cluster_name    = var.name
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | locals {
21 |   app       = "calculator-agent"
22 |   namespace = "strands-agents"
23 |   full_name = "${var.name}-${local.namespace}-${local.app}"
24 | }
25 | resource "aws_ecr_repository" "this" {
26 |   name                 = local.full_name
27 |   image_tag_mutability = "MUTABLE"
28 |   force_delete         = true
29 |   
30 |   image_scanning_configuration {
31 |     scan_on_push = true
32 |   }
33 | 
34 |   encryption_configuration {
35 |     encryption_type = "KMS"
36 |   }
37 | }
38 | output "ecr_repository_url" {
39 |   value = aws_ecr_repository.this.repository_url
40 | }
41 | 
42 | module "pod_identity" {
43 |   source = "terraform-aws-modules/eks-pod-identity/aws"
44 |   version = "1.12.0"
45 | 
46 |   name                 = local.full_name
47 |   use_name_prefix      = false
48 |   attach_custom_policy = true
49 |   policy_statements = [
50 |     {
51 |       sid = "Bedrock"
52 |       actions = [
53 |         "bedrock:InvokeModel",
54 |         "bedrock:InvokeModelWithResponseStream",
55 |       ]
56 |       resources = ["*"]
57 |     }
58 |   ]
59 |   associations = {
60 |     app = {
61 |       service_account = local.app
62 |       namespace       = local.namespace
63 |       cluster_name    = var.name
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/openwebui_pipe_function.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class Pipe:
 6 |     class Valves(BaseModel):
 7 |         AGENT_ENDPOINT: str = "http://calculator-agent.agno"
 8 | 
 9 |     def __init__(self):
10 |         self.valves = self.Valves()
11 | 
12 |     def pipes(self):
13 |         return [{"id": "agno-calculator-agent", "name": "Agno - Calculator Agent"}]
14 | 
15 |     def pipe(self, body: dict, __user__: dict):
16 |         messages = body.get("messages", [])
17 |         last_user_message = next(
18 |             (m for m in reversed(messages) if m.get("role") == "user"), None
19 |         )
20 | 
21 |         if not last_user_message:
22 |             return
23 | 
24 |         message = last_user_message["content"]
25 |         if message.startswith("### Task"):
26 |             print("Skip: ### Task")
27 |             return
28 | 
29 |         print("Latest user message:", message)
30 | 
31 |         try:
32 |             response = requests.post(
33 |                 url=self.valves.AGENT_ENDPOINT,
34 |                 json={"prompt": message},
35 |                 headers={"Content-Type": "application/json"},
36 |                 stream=True,
37 |                 timeout=60,
38 |             )
39 |             response.raise_for_status()
40 | 
41 |             if body.get("stream", False):
42 |                 return self.stream_response(response)
43 |             else:
44 |                 return response.text
45 |         except Exception as e:
46 |             return f"Error: {e}"
47 | 
48 |     def stream_response(self, response):
49 |         for line in response.iter_lines(decode_unicode=True):
50 |             if line:
51 |                 yield line + "\n"
52 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/static/code/module3/credit-validation/requirements.txt:
--------------------------------------------------------------------------------
 1 | # # Credit Underwriting System Dependencies
 2 | 
 3 | # # Environment variables
 4 | # python-dotenv>=1.0.0
 5 | 
 6 | # # Core MCP and LangChain dependencies
 7 | # langchain-mcp-adapters>=0.1.0
 8 | # langgraph>=0.2.0
 9 | # langchain
10 | # langchain-openai>=0.2.0
11 | 
12 | # # MCP Server framework
13 | # mcp>=1.0.0
14 | 
15 | # # Web framework and server
16 | # fastapi>=0.104.0
17 | # uvicorn[standard]>=0.24.0
18 | 
19 | # # HTTP client for testing
20 | # aiohttp>=3.9.0
21 | 
22 | # # Monitoring and tracing
23 | # langfuse>=2.60.4
24 | 
25 | # # Image processing and encoding
26 | # Pillow>=10.0.0
27 | 
28 | # # AWS SDK for S3 operations
29 | # boto3>=1.34.0
30 | # botocore>=1.34.0
31 | 
32 | # # Async context management
33 | # contextlib2>=21.6.0
34 | 
35 | # # Additional utilities
36 | # python-multipart>=0.0.6
37 | # pydantic>=2.0.0
38 | 
39 | # # Development and testing
40 | # pytest>=7.4.0
41 | # pytest-asyncio>=0.21.0
42 | 
43 | 
44 | # Credit Underwriting System Dependencies
45 | 
46 | # Environment variables
47 | python-dotenv
48 | 
49 | # Core MCP and LangChain dependencies
50 | langchain-mcp-adapters
51 | langgraph
52 | langchain
53 | langchain-openai
54 | 
55 | # MCP Server framework
56 | mcp
57 | 
58 | # fast-mcp>=2.8.1
59 | 
60 | # Web framework and server
61 | fastapi
62 | uvicorn[standard]
63 | 
64 | # HTTP client for testing
65 | aiohttp
66 | 
67 | # Monitoring and tracing
68 | langfuse
69 | 
70 | # Image processing and encoding
71 | Pillow
72 | 
73 | # AWS SDK for S3 operations
74 | boto3
75 | botocore
76 | 
77 | # Async context management
78 | contextlib2
79 | 
80 | # Additional utilities
81 | python-multipart
82 | pydantic
83 | 
84 | # Development and testing
85 | # pytest>=7.4.0
86 | # pytest-asyncio>=0.21.0
87 | 


--------------------------------------------------------------------------------
/workshops/eks-genai-workshop/content/introduction/getting-started/index.en.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Getting Started"
 3 | chapter: true
 4 | weight: 12
 5 | ---
 6 | 
 7 | Before we dive into deploying Large Language Models and building intelligent agents, let's ensure you have access to the workshop environment. This workshop can be completed in two ways, depending on how you're participating.
 8 | 
 9 | ## 🎯 Choose Your Path
10 | 
11 | ::::tabs
12 | 
13 | :::tab{label="AWS Event"}
14 | ### 🎪 Participating in an AWS Event
15 | 
16 | **Your infrastructure is pre-deployed and ready!** 
17 | 
18 | The event organizers have already set up:
19 | - ✅ EKS cluster with all required components
20 | - ✅ GenAI platform stack (vLLM, LiteLLM, Langfuse)
21 | - ✅ Model storage and caching
22 | - ✅ Networking and security configurations
23 | 
24 | [**Continue with AWS Event Setup →**](/introduction/getting-started/at-aws-event/)
25 | :::
26 | 
27 | :::tab{label="Own Account"}
28 | ### 💻 Using Your Own AWS Account
29 | 
30 | Running this workshop independently in your personal or company AWS account gives you:
31 | - Full control over the environment
32 | - Ability to keep resources after the workshop
33 | - Opportunity to customize configurations
34 | - Deeper understanding of the infrastructure
35 | 
36 | **You'll need to**:
37 | - ✅ Have an AWS account with appropriate permissions
38 | - ✅ Deploy the workshop infrastructure
39 | - ✅ Configure access to the EKS cluster
40 | - ✅ Verify all components are running
41 | 
42 | ::alert[**Cost Warning**: This workshop uses GPU and Neuron instances which incur charges. Remember to clean up resources after completion!]{type="warning"}
43 | 
44 | [**Continue with Own Account Setup →**](/introduction/getting-started/self-account/)
45 | :::
46 | 
47 | ---
48 | 
49 | **Ready? Select your path above to continue!**
50 | 


--------------------------------------------------------------------------------
/terraform/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "name" {
 2 |   type    = string
 3 |   default = "genai-on-eks"
 4 | }
 5 | variable "region" {
 6 |   type    = string
 7 |   default = "us-west-2"
 8 | }
 9 | variable "vpc_cidr" {
10 |   type    = string
11 |   default = "10.0.0.0/16"
12 | }
13 | variable "eks_cluster_version" {
14 |   type    = string
15 |   default = "1.34"
16 | }
17 | variable "domain" {
18 |   type    = string
19 |   default = "bursting"
20 | }
21 | variable "efs_throughput_mode" {
22 |   type    = string
23 |   default = ""
24 | }
25 | variable "gpu_nodepool_capacity_type" {
26 |   type    = list(string)
27 |   default = ["spot", "on-demand"]
28 | }
29 | 
30 | variable "gpu_nodepool_instance_family" {
31 |   type    = list(string)
32 |   default = ["g6e", "g6", "g5g", "p5en", "p5e", "p5", "p4de", "p4d"]
33 | }
34 | 
35 | variable "enable_nginx" {
36 |   type    = bool
37 |   default = true
38 | }
39 | 
40 | variable "enable_lws" {
41 |   type    = bool
42 |   default = true
43 | }
44 | 
45 | locals {
46 |   account_id = data.aws_caller_identity.current.account_id
47 | }
48 | 
49 | data "aws_caller_identity" "current" {}
50 | 
51 | data "aws_availability_zones" "available" {}
52 | 
53 | terraform {
54 |   required_version = ">= 1.5"
55 | 
56 |   required_providers {
57 |     aws = {
58 |       source  = "hashicorp/aws"
59 |       version = "~> 6.15.0"
60 |     }
61 |     kubernetes = {
62 |       source  = "hashicorp/kubernetes"
63 |       version = "~> 2.38.0"
64 |     }
65 |     helm = {
66 |       source  = "hashicorp/helm"
67 |       version = "~> 2.17.0"
68 |     }
69 |     kubectl = {
70 |       source  = "alekc/kubectl"
71 |       version = "~> 2.1.3"
72 |     }
73 |     local = {
74 |       source  = "hashicorp/local"
75 |       version = "~> 2.5.3"
76 |     }
77 |   }
78 | }
79 | 
80 | provider "aws" { region = var.region }
81 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/archive/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | locals {
21 |   name = "tgi-neuron"
22 | }
23 | resource "aws_ecr_repository" "this" {
24 |   name                 = "${var.name}-${local.name}"
25 |   image_tag_mutability = "MUTABLE"
26 |   force_delete         = true
27 | 
28 |   image_scanning_configuration {
29 |     scan_on_push = true
30 |   }
31 | 
32 |   encryption_configuration {
33 |     encryption_type = "KMS"
34 |   }
35 | }
36 | output "ecr_repository_url" {
37 |   value = aws_ecr_repository.this.repository_url
38 | }
39 | 
40 | module "pod_identity" {
41 |   source  = "terraform-aws-modules/eks-pod-identity/aws"
42 |   version = "1.12.0"
43 | 
44 |   name                 = "${var.name}-${var.region}-tgi-neuron-build"
45 |   use_name_prefix      = false
46 |   attach_custom_policy = true
47 |   policy_statements = [
48 |     {
49 |       actions = [
50 |         "ecr:GetAuthorizationToken",
51 |       ]
52 |       resources = ["*"]
53 |     },
54 |     {
55 |       actions = [
56 |         "ecr:CompleteLayerUpload",
57 |         "ecr:UploadLayerPart",
58 |         "ecr:InitiateLayerUpload",
59 |         "ecr:BatchCheckLayerAvailability",
60 |         "ecr:PutImage",
61 |         "ecr:BatchGetImage"
62 |       ]
63 |       resources = [aws_ecr_repository.this.arn]
64 |     }
65 |   ]
66 |   associations = {
67 |     litellm = {
68 |       service_account = "tgi-neuron-build"
69 |       namespace       = "tgi"
70 |       cluster_name    = var.name
71 |     }
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/components/o11y/mlflow/index.mjs:
--------------------------------------------------------------------------------
 1 | import { fileURLToPath } from "url";
 2 | import path from "path";
 3 | import fs from "fs";
 4 | import handlebars from "handlebars";
 5 | import { $ } from "zx";
 6 | $.verbose = true;
 7 | 
 8 | export const name = "MLflow";
 9 | const __filename = fileURLToPath(import.meta.url);
10 | const DIR = path.dirname(__filename);
11 | let BASE_DIR;
12 | let config;
13 | let utils;
14 | 
15 | export async function init(_BASE_DIR, _config, _utils) {
16 |   BASE_DIR = _BASE_DIR;
17 |   config = _config;
18 |   utils = _utils;
19 | }
20 | 
21 | export async function install() {
22 |   const requiredEnvVars = ["MLFLOW_USERNAME", "MLFLOW_PASSWORD"];
23 |   utils.checkRequiredEnvVars(requiredEnvVars);
24 | 
25 |   await utils.terraform.apply(DIR);
26 |   const mlflowBucketName = await utils.terraform.output(DIR, { outputName: "mlflow_bucket_name" });
27 | 
28 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
29 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
30 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
31 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
32 |   const valuesVars = {
33 |     DOMAIN: process.env.DOMAIN,
34 |     MLFLOW_USERNAME: process.env.MLFLOW_USERNAME,
35 |     MLFLOW_PASSWORD: process.env.MLFLOW_PASSWORD,
36 |     MLFLOW_BUCKET_NAME: mlflowBucketName,
37 |   };
38 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
39 |   await $`helm repo add community-charts https://community-charts.github.io/helm-charts`;
40 |   await $`helm upgrade --install mlflow community-charts/mlflow --namespace mlflow --create-namespace -f ${valuesRenderedPath}`;
41 | }
42 | 
43 | export async function uninstall() {
44 |   await $`helm uninstall mlflow --namespace mlflow`;
45 |   await utils.terraform.destroy(DIR);
46 | }
47 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/archive/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | locals {
21 |   name = "vllm-neuron"
22 | }
23 | resource "aws_ecr_repository" "this" {
24 |   name                 = "${var.name}-${local.name}"
25 |   image_tag_mutability = "MUTABLE"
26 |   force_delete         = true
27 | 
28 |   image_scanning_configuration {
29 |     scan_on_push = true
30 |   }
31 | 
32 |   encryption_configuration {
33 |     encryption_type = "KMS"
34 |   }
35 | }
36 | output "ecr_repository_url" {
37 |   value = aws_ecr_repository.this.repository_url
38 | }
39 | 
40 | module "pod_identity" {
41 |   source  = "terraform-aws-modules/eks-pod-identity/aws"
42 |   version = "1.12.0"
43 | 
44 |   name                 = "${var.name}-${var.region}-vllm-neuron-build"
45 |   use_name_prefix      = false
46 |   attach_custom_policy = true
47 |   policy_statements = [
48 |     {
49 |       actions = [
50 |         "ecr:GetAuthorizationToken",
51 |       ]
52 |       resources = ["*"]
53 |     },
54 |     {
55 |       actions = [
56 |         "ecr:CompleteLayerUpload",
57 |         "ecr:UploadLayerPart",
58 |         "ecr:InitiateLayerUpload",
59 |         "ecr:BatchCheckLayerAvailability",
60 |         "ecr:PutImage",
61 |         "ecr:BatchGetImage"
62 |       ]
63 |       resources = [aws_ecr_repository.this.arn]
64 |     }
65 |   ]
66 |   associations = {
67 |     litellm = {
68 |       service_account = "vllm-neuron-build"
69 |       namespace       = "vllm"
70 |       cluster_name    = var.name
71 |     }
72 |   }
73 | }
74 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/openwebui_pipe_function.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class Pipe:
 6 |     class Valves(BaseModel):
 7 |         AGENT_ENDPOINT: str = "http://calculator-agent.strands-agents"
 8 | 
 9 |     def __init__(self):
10 |         self.valves = self.Valves()
11 | 
12 |     def pipes(self):
13 |         return [
14 |             {
15 |                 "id": "strands-agents-calculator-agent",
16 |                 "name": "Strands Agents - Calculator Agent",
17 |             }
18 |         ]
19 | 
20 |     def pipe(self, body: dict, __user__: dict):
21 |         messages = body.get("messages", [])
22 |         last_user_message = next(
23 |             (m for m in reversed(messages) if m.get("role") == "user"), None
24 |         )
25 | 
26 |         if not last_user_message:
27 |             return
28 | 
29 |         message = last_user_message["content"]
30 |         if message.startswith("### Task"):
31 |             print("Skip: ### Task")
32 |             return
33 | 
34 |         print("Latest user message:", message)
35 | 
36 |         try:
37 |             response = requests.post(
38 |                 url=self.valves.AGENT_ENDPOINT,
39 |                 json={"prompt": message},
40 |                 headers={"Content-Type": "application/json"},
41 |                 stream=True,
42 |                 timeout=60,
43 |             )
44 |             response.raise_for_status()
45 | 
46 |             if body.get("stream", False):
47 |                 return self.stream_response(response)
48 |             else:
49 |                 return response.text
50 |         except Exception as e:
51 |             return f"Error: {e}"
52 | 
53 |     def stream_response(self, response):
54 |         for line in response.iter_lines(decode_unicode=True):
55 |             if line:
56 |                 yield line + "\n"
57 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "SGLang";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["HF_TOKEN"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`;
28 |   await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`;
29 |   const secretTemplatePath = path.join(DIR, "secret.template.yaml");
30 |   const secretRenderedPath = path.join(DIR, "secret.rendered.yaml");
31 |   const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8");
32 |   const secretTemplate = handlebars.compile(secretTemplateString);
33 |   const secretVars = {
34 |     HF_TOKEN: process.env.HF_TOKEN,
35 |   };
36 |   fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars));
37 |   await $`kubectl apply -f ${secretRenderedPath}`;
38 |   const { models } = config["llm-model"]["sglang"];
39 |   await utils.model.addModels(models, "llm-model", "sglang");
40 | }
41 | 
42 | export async function uninstall() {
43 |   const { models } = config["llm-model"]["sglang"];
44 |   await utils.model.removeAllModels(models, "llm-model", "sglang");
45 |   await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`;
46 |   await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`;
47 |   await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`;
48 | }
49 | 


--------------------------------------------------------------------------------
/components/llm-model/ollama/deployment.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: ollama
 5 |   namespace: ollama
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: ollama
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: ollama
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: ollama
24 |           image: ollama/ollama:0.10.1
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           lifecycle:
34 |             postStart:
35 |               exec:
36 |                 command: ["/scripts/init-ollama.sh"]
37 |           ports:
38 |             - name: http
39 |               containerPort: 11434
40 |           resources:
41 |             requests:
42 |               cpu: 3 #75%
43 |               memory: 24Gi #75%
44 |               nvidia.com/gpu: 1
45 |             limits:
46 |               nvidia.com/gpu: 1
47 |           volumeMounts:
48 |             - name: ollama-init-script
49 |               mountPath: /scripts
50 |             - name: ollama-cache
51 |               mountPath: /root/.ollama
52 |       volumes:
53 |         - name: ollama-init-script
54 |           configMap:
55 |             name: ollama-init-script
56 |             defaultMode: 0755
57 |         - name: ollama-cache
58 |           persistentVolumeClaim:
59 |             claimName: ollama-cache
60 |       tolerations:
61 |         - key: nvidia.com/gpu
62 |           operator: Exists
63 |           effect: NoSchedule
64 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/server.py:
--------------------------------------------------------------------------------
 1 | from fastmcp import FastMCP
 2 | 
 3 | mcp = FastMCP("Calculator")
 4 | 
 5 | 
 6 | # Define a simple addition tool
 7 | @mcp.tool(description="Add two numbers together")
 8 | def add(x: int, y: int) -> int:
 9 |     """Add two numbers and return the result.
10 | 
11 |     Args:
12 |         x: First number
13 |         y: Second number
14 | 
15 |     Returns:
16 |         The sum of x and y
17 |     """
18 |     print("Calling add tool.\n")
19 |     return x + y
20 | 
21 | 
22 | # Define a subtraction tool
23 | @mcp.tool(description="Subtract one number from another")
24 | def subtract(x: int, y: int) -> int:
25 |     """Subtract y from x and return the result.
26 | 
27 |     Args:
28 |         x: Number to subtract from
29 |         y: Number to subtract
30 | 
31 |     Returns:
32 |         The difference (x - y)
33 |     """
34 |     print("Calling subtract tool.\n")
35 |     return x - y
36 | 
37 | 
38 | # Define a multiplication tool
39 | @mcp.tool(description="Multiply two numbers together")
40 | def multiply(x: int, y: int) -> int:
41 |     """Multiply two numbers and return the result.
42 | 
43 |     Args:
44 |         x: First number
45 |         y: Second number
46 | 
47 |     Returns:
48 |         The product of x and y
49 |     """
50 |     print("Calling multiply tool.\n")
51 |     return x * y
52 | 
53 | 
54 | # Define a division tool
55 | @mcp.tool(description="Divide one number by another")
56 | def divide(x: float, y: float) -> float:
57 |     """Divide x by y and return the result.
58 | 
59 |     Args:
60 |         x: Numerator
61 |         y: Denominator (must not be zero)
62 | 
63 |     Returns:
64 |         The quotient (x / y)
65 | 
66 |     Raises:
67 |         ValueError: If y is zero
68 |     """
69 |     print("Calling divide tool.\n")
70 |     if y == 0:
71 |         raise ValueError("Cannot divide by zero")
72 |     return x / y
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     mcp.run()
77 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Text Embedding Inference (TEI)";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["HF_TOKEN"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`;
28 |   await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`;
29 |   const secretTemplatePath = path.join(DIR, "secret.template.yaml");
30 |   const secretRenderedPath = path.join(DIR, "secret.rendered.yaml");
31 |   const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8");
32 |   const secretTemplate = handlebars.compile(secretTemplateString);
33 |   const secretVars = {
34 |     HF_TOKEN: process.env.HF_TOKEN,
35 |   };
36 |   fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars));
37 |   await $`kubectl apply -f ${secretRenderedPath}`;
38 |   const { models } = config["embedding-model"]["tei"];
39 |   await utils.model.addModels(models, "embedding-model", "tei");
40 | }
41 | 
42 | export async function uninstall() {
43 |   const { models } = config["embedding-model"]["tei"];
44 |   await utils.model.removeAllModels(models, "embedding-model", "tei");
45 |   await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`;
46 |   await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`;
47 |   await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`;
48 | }
49 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "TGI";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["HF_TOKEN"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`;
28 |   await $`kubectl apply -f ${path.join(DIR, "pvc-huggingface-cache.yaml")}`;
29 |   await $`kubectl apply -f ${path.join(DIR, "pvc-neuron-cache.yaml")}`;
30 |   const secretTemplatePath = path.join(DIR, "secret.template.yaml");
31 |   const secretRenderedPath = path.join(DIR, "secret.rendered.yaml");
32 |   const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8");
33 |   const secretTemplate = handlebars.compile(secretTemplateString);
34 |   const secretVars = {
35 |     HF_TOKEN: process.env.HF_TOKEN,
36 |   };
37 |   fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars));
38 |   await $`kubectl apply -f ${secretRenderedPath}`;
39 |   await utils.model.addModels(models, "llm-model", "tgi");
40 | }
41 | 
42 | export async function uninstall() {
43 |   const { models } = config["llm-model"]["tgi"];
44 |   await utils.model.removeAllModels(models, "llm-model", "tgi");
45 |   await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`;
46 |   await $`kubectl delete -f ${path.join(DIR, "pvc-huggingface-cache.yaml")} --ignore-not-found`;
47 |   await $`kubectl delete -f ${path.join(DIR, "pvc-neuron-cache.yaml")} --ignore-not-found`;
48 |   await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`;
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/mcp-server/calculator/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Calculator MCP Server";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const { REGION } = process.env;
25 |   await utils.terraform.apply(DIR);
26 |   const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" });
27 |   cd(DIR);
28 |   await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`;
29 |   const { useBuildx, arch } = config.docker;
30 |   if (useBuildx) {
31 |     await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`;
32 |   } else {
33 |     await $`docker build -t ${ecrRepoUrl}:latest .`;
34 |     await $`docker push ${ecrRepoUrl}:latest`;
35 |   }
36 |   await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`;
37 |   const mcpServerTemplatePath = path.join(DIR, "mcp-server.template.yaml");
38 |   const mcpServerRenderedPath = path.join(DIR, "mcp-server.rendered.yaml");
39 |   const mcpServerTemplateString = fs.readFileSync(mcpServerTemplatePath, "utf8");
40 |   const mcpServerTemplate = handlebars.compile(mcpServerTemplateString);
41 |   const mcpServerVars = {
42 |     useBuildx,
43 |     arch,
44 |     IMAGE: `${ecrRepoUrl}:latest`,
45 |   };
46 |   fs.writeFileSync(mcpServerRenderedPath, mcpServerTemplate(mcpServerVars));
47 |   await $`kubectl apply -f ${DIR}/mcp-server.rendered.yaml`;
48 | }
49 | 
50 | export async function uninstall() {
51 |   await $`kubectl delete -f ${DIR}/mcp-server.rendered.yaml --ignore-not-found`;
52 |   await utils.terraform.destroy(DIR);
53 | }
54 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "vLLM";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["HF_TOKEN"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`;
28 |   await $`kubectl apply -f ${path.join(DIR, "pvc-huggingface-cache.yaml")}`;
29 |   await $`kubectl apply -f ${path.join(DIR, "pvc-neuron-cache.yaml")}`;
30 |   const secretTemplatePath = path.join(DIR, "secret.template.yaml");
31 |   const secretRenderedPath = path.join(DIR, "secret.rendered.yaml");
32 |   const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8");
33 |   const secretTemplate = handlebars.compile(secretTemplateString);
34 |   const secretVars = {
35 |     HF_TOKEN: process.env.HF_TOKEN,
36 |   };
37 |   fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars));
38 |   await $`kubectl apply -f ${secretRenderedPath}`;
39 |   const { models } = config["llm-model"]["vllm"];
40 |   await utils.model.addModels(models, "llm-model", "vllm");
41 | }
42 | 
43 | export async function uninstall() {
44 |   const { models } = config["llm-model"]["vllm"];
45 |   await utils.model.removeAllModels(models, "llm-model", "vllm");
46 |   await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`;
47 |   await $`kubectl delete -f ${path.join(DIR, "pvc-huggingface-cache.yaml")} --ignore-not-found`;
48 |   await $`kubectl delete -f ${path.join(DIR, "pvc-neuron-cache.yaml")} --ignore-not-found`;
49 |   await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`;
50 | }
51 | 


--------------------------------------------------------------------------------
/components/o11y/langfuse/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Langfuse";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["LANGFUSE_USERNAME", "LANGFUSE_PASSWORD", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 | 
27 |   await utils.terraform.apply(DIR);
28 |   const tfOutput = await utils.terraform.output(DIR, {});
29 |   const langfuseBucketName = tfOutput.langfuse_bucket_name.value;
30 | 
31 |   await $`helm repo add langfuse https://langfuse.github.io/langfuse-k8s`;
32 |   await $`helm repo update`;
33 | 
34 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
35 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
36 |   const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8");
37 |   const valuesTemplate = handlebars.compile(valuesTemplateString);
38 |   const valuesVars = {
39 |     DOMAIN: process.env.DOMAIN,
40 |     LANGFUSE_USERNAME: process.env.LANGFUSE_USERNAME,
41 |     LANGFUSE_PASSWORD: process.env.LANGFUSE_PASSWORD,
42 |     LANGFUSE_PUBLIC_KEY: process.env.LANGFUSE_PUBLIC_KEY,
43 |     LANGFUSE_SECRET_KEY: process.env.LANGFUSE_SECRET_KEY,
44 |     LANGFUSE_BUCKET_NAME: langfuseBucketName,
45 |     AWS_REGION: process.env.AWS_REGION,
46 |   };
47 |   fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars));
48 |   await $`helm upgrade --install langfuse langfuse/langfuse --namespace langfuse --create-namespace -f ${valuesRenderedPath}`;
49 | }
50 | 
51 | export async function uninstall() {
52 |   await $`helm uninstall langfuse --namespace langfuse`;
53 |   await utils.terraform.destroy(DIR);
54 | }
55 | 


--------------------------------------------------------------------------------
/cli-menu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "componentCategories": [
 3 |     {
 4 |       "dir": "ai-gateway",
 5 |       "name": "AI Gateway",
 6 |       "components": [
 7 |         { "dir": "litellm", "name": "LiteLLM" },
 8 |         { "dir": "kong", "name": "Kong" }
 9 |       ]
10 |     },
11 |     {
12 |       "dir": "llm-model",
13 |       "name": "LLM Model",
14 |       "components": [
15 |         { "dir": "vllm", "name": "vLLM" },
16 |         { "dir": "sglang", "name": "SGLang" },
17 |         { "dir": "tgi", "name": "TGI" },
18 |         { "dir": "ollama", "name": "Ollama" }
19 |       ]
20 |     },
21 |     {
22 |       "dir": "embedding-model",
23 |       "name": "Embedding Model",
24 |       "components": [{ "dir": "tei", "name": "Text Embedding Inference (TEI)" }]
25 |     },
26 |     { "dir": "guardrail", "name": "Guardrail", "components": [{ "dir": "guardrails-ai", "name": "Guardrails AI" }] },
27 |     {
28 |       "dir": "o11y",
29 |       "name": "Observability",
30 |       "components": [
31 |         { "dir": "langfuse", "name": "Langfuse" },
32 |         { "dir": "mlflow", "name": "MLflow" },
33 |         { "dir": "phoenix", "name": "Phoenix" }
34 |       ]
35 |     },
36 |     { "dir": "gui-app", "name": "GUI App", "components": [{ "dir": "openwebui", "name": "Open WebUI" }] },
37 |     {
38 |       "dir": "vector-database",
39 |       "name": "Vector Database",
40 |       "components": [
41 |         { "dir": "qdrant", "name": "Qdrant" },
42 |         { "dir": "chroma", "name": "Chroma" },
43 |         { "dir": "milvus", "name": "Milvus" }
44 |       ]
45 |     },
46 |     { "dir": "workflow-automation", "name": "Workflow Automation", "components": [{ "dir": "n8n", "name": "n8n" }] }
47 |   ],
48 |   "exampleCategories": [
49 |     { "dir": "mcp-server", "name": "MCP Server", "examples": [{ "dir": "calculator", "name": "Calculator" }] },
50 |     {
51 |       "dir": "strands-agents",
52 |       "name": "Strands Agents",
53 |       "examples": [{ "dir": "calculator-agent", "name": "Calculator Agent" }]
54 |     },
55 |     {
56 |       "dir": "agno",
57 |       "name": "Agno",
58 |       "examples": [{ "dir": "calculator-agent", "name": "Calculator Agent" }]
59 |     }
60 |   ]
61 | }
62 | 


--------------------------------------------------------------------------------
/components/vector-database/milvus/values.template.yaml:
--------------------------------------------------------------------------------
 1 | # https://github.com/Milvus-io/Milvus/issues/40267
 2 | # Ingress expodes service port, not 9091 web UI port
 3 | # ingress:
 4 | #   enabled: true
 5 | #   annotations:
 6 | #     external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only
 7 | #     nginx.ingress.kubernetes.io/auth-type: basic
 8 | #     nginx.ingress.kubernetes.io/auth-secret: basic-auth
 9 | #     nginx.ingress.kubernetes.io/auth-realm: "Authentication Required"
10 | #   ingressClassName: nginx
11 | #   rules:
12 | #     - host: Milvus.{{{DOMAIN}}}
13 | #       path: /
14 | #       pathType: Prefix
15 | 
16 | cluster:
17 |   enabled: false
18 | 
19 | serviceAccount:
20 |   create: true
21 | 
22 | etcd:
23 |   replicaCount: 1
24 |   resources:
25 |     requests:
26 |       cpu: 200m    
27 |       memory: 256Mi
28 |     limits:
29 |       memory: 256Mi
30 | 
31 | minio:
32 |   enabled: false
33 |   externalS3:
34 |     enabled: true
35 |     host: "s3.{{AWS_REGION}}.amazonaws.com"
36 |     port: "443"
37 |     useSSL: true
38 |     bucketName: "{{MILVUS_BUCKET_NAME}}"
39 |     rootPath: "milvus"
40 |     useIAM: true
41 |     cloudProvider: "aws"
42 |     region: "{{AWS_REGION}}"
43 | 
44 | pulsarv3:
45 |   components:
46 |     autorecovery: false
47 |   zookeeper:
48 |     replicaCount: 1
49 |     resources:
50 |       requests:
51 |         cpu: 200m    
52 |         memory: 256Mi
53 |       limits:
54 |         memory: 256Mi
55 |   broker:
56 |     replicaCount: 1
57 |     resources:
58 |       requests:
59 |         cpu: 500m    
60 |         memory: 2Gi
61 |       limits:
62 |         memory: 2Gi
63 |     configData:
64 |       autoSkipNonRecoverableData: "true"
65 |       managedLedgerDefaultEnsembleSize: "1"
66 |       managedLedgerDefaultWriteQuorum: "1"
67 |       managedLedgerDefaultAckQuorum: "1"
68 |   proxy:
69 |     replicaCount: 1
70 |     resources:
71 |       requests:
72 |         cpu: 500m    
73 |         memory: 1Gi
74 |       limits:
75 |         memory: 1Gi
76 |   bookkeeper:
77 |     replicaCount: 1
78 |     resources:
79 |       requests:
80 |         cpu: 500m    
81 |         memory: 2Gi
82 |       limits:
83 |         memory: 2Gi
84 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/model-qwen3-8b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-8b
 5 |   namespace: tgi
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-8b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-8b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: tgi
24 |           image: ghcr.io/huggingface/text-generation-inference:3.3.4
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=Qwen/Qwen3-8B
35 |             - --trust-remote-code
36 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
37 |           env:
38 |             - name: HF_TOKEN
39 |               valueFrom:
40 |                 secretKeyRef:
41 |                   name: hf-token
42 |                   key: token
43 |           ports:
44 |             - name: http
45 |               containerPort: 80
46 |           resources:
47 |             requests:
48 |               cpu: 3 #75%
49 |               memory: 24Gi #75%
50 |               nvidia.com/gpu: 1
51 |             limits:
52 |               nvidia.com/gpu: 1
53 |           volumeMounts:
54 |             - name: huggingface-cache
55 |               mountPath: /root/.cache/huggingface
56 |       volumes:
57 |         - name: huggingface-cache
58 |           persistentVolumeClaim:
59 |             claimName: huggingface-cache
60 |       tolerations:
61 |         - key: nvidia.com/gpu
62 |           operator: Exists
63 |           effect: NoSchedule
64 | ---
65 | apiVersion: v1
66 | kind: Service
67 | metadata:
68 |   name: qwen3-8b
69 |   namespace: tgi
70 | spec:
71 |   selector:
72 |     app: qwen3-8b
73 |   ports:
74 |     - name: http
75 |       port: 80
76 | 


--------------------------------------------------------------------------------
/components/ai-gateway/kong/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $ } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Kong";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const requiredEnvVars = ["KONG_API_KEY", "KONG_API_KEY_HEADER"];
25 |   utils.checkRequiredEnvVars(requiredEnvVars);
26 |   const { DOMAIN, KONG_API_KEY, KONG_API_KEY_HEADER } = process.env;
27 | 
28 |   const valuesTemplatePath = path.join(DIR, "values.template.yaml");
29 |   const valuesRenderedPath = path.join(DIR, "values.rendered.yaml");
30 |   const valuesVars = {
31 |     DOMAIN,
32 |   };
33 |   utils.renderTemplate(valuesTemplatePath, valuesRenderedPath, valuesVars);
34 |   await $`helm repo add kong https://charts.konghq.com`;
35 |   await $`helm upgrade --install kong kong/kong --namespace kong --create-namespace -f ${valuesRenderedPath}`;
36 | 
37 |   const kongTemplatePath = path.join(DIR, "kong.template.yaml");
38 |   const kongRenderedPath = path.join(DIR, "kong.rendered.yaml");
39 |   const kongVars = {
40 |     DOMAIN,
41 |     KONG_API_KEY,
42 |     KONG_API_KEY_HEADER,
43 |   };
44 |   utils.renderTemplate(kongTemplatePath, kongRenderedPath, kongVars);
45 |   await $`kubectl apply -f ${kongRenderedPath}`;
46 | }
47 | 
48 | export async function uninstall() {
49 |   const { DOMAIN, KONG_API_KEY, KONG_API_KEY_HEADER } = process.env;
50 |   const kongTemplatePath = path.join(DIR, "kong.template.yaml");
51 |   const kongRenderedPath = path.join(DIR, "kong.rendered.yaml");
52 |   const kongVars = {
53 |     DOMAIN,
54 |     KONG_API_KEY,
55 |     KONG_API_KEY_HEADER,
56 |   };
57 |   utils.renderTemplate(kongTemplatePath, kongRenderedPath, kongVars);
58 |   await $`kubectl delete -f ${kongRenderedPath} --ignore-not-found`;
59 |   await $`helm uninstall kong --namespace kong`;
60 | }
61 | 


--------------------------------------------------------------------------------
/components/ai-gateway/kong/examples/kong.yaml:
--------------------------------------------------------------------------------
 1 | # Chat Completions
 2 | apiVersion: configuration.konghq.com/v1
 3 | kind: KongPlugin
 4 | metadata:
 5 |   name: ai-proxy-qwen3-30b-instruct-fp8-chat
 6 |   namespace: vllm
 7 |   annotations:
 8 |     kubernetes.io/ingress.class: kong
 9 | plugin: ai-proxy
10 | config:
11 |   logging:
12 |     log_statistics: true
13 |   model:
14 |     name: qwen3-30b-instruct-fp8
15 |     options:
16 |       upstream_url: http://qwen3-30b-instruct-fp8.vllm:8000/v1/chat/completions
17 |     provider: openai
18 |   route_type: llm/v1/chat
19 | ---
20 | apiVersion: networking.k8s.io/v1
21 | kind: Ingress
22 | metadata:
23 |   name: kong-qwen3-30b-instruct-fp8-chat
24 |   namespace: vllm
25 |   annotations:
26 |     konghq.com/plugins: ai-proxy-qwen3-30b-instruct-fp8-chat
27 | spec:
28 |   ingressClassName: kong
29 |   rules:
30 |     - http:
31 |         paths:
32 |           - path: /v1/chat/completions
33 |             pathType: Prefix
34 |             backend:
35 |               service:
36 |                 name: qwen3-30b-instruct-fp8
37 |                 port:
38 |                   number: 8000
39 | ---
40 | # Completions
41 | apiVersion: configuration.konghq.com/v1
42 | kind: KongPlugin
43 | metadata:
44 |   name: ai-proxy-qwen3-30b-instruct-fp8-completions
45 |   namespace: vllm
46 |   annotations:
47 |     kubernetes.io/ingress.class: kong
48 | plugin: ai-proxy
49 | config:
50 |   logging:
51 |     log_statistics: true
52 |   model:
53 |     name: qwen3-30b-instruct-fp8
54 |     options:
55 |       upstream_url: http://qwen3-30b-instruct-fp8.vllm:8000/v1/completions
56 |     provider: openai
57 |   route_type: llm/v1/completions
58 | ---
59 | apiVersion: networking.k8s.io/v1
60 | kind: Ingress
61 | metadata:
62 |   name: kong-qwen3-30b-instruct-fp8-completions
63 |   namespace: vllm
64 |   annotations:
65 |     konghq.com/plugins: ai-proxy-qwen3-30b-instruct-fp8-completions
66 | spec:
67 |   ingressClassName: kong
68 |   rules:
69 |     - http:
70 |         paths:
71 |           - path: /v1/completions
72 |             pathType: Prefix
73 |             backend:
74 |               service:
75 |                 name: qwen3-30b-instruct-fp8
76 |                 port:
77 |                   number: 8000
78 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/agent.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: calculator-agent
 5 |   namespace: agno
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: calculator-agent
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: calculator-agent
15 |     spec:
16 |       serviceAccountName: calculator-agent
17 |       automountServiceAccountToken: false    
18 |       {{#unless useBuildx}}
19 |       nodeSelector:
20 |         kubernetes.io/arch: {{{arch}}}
21 |       {{/unless}}
22 |       containers:
23 |         - name: agent
24 |           image: {{{IMAGE}}}
25 |           env:
26 |             - name: USE_BEDROCK
27 |               value: "{{{USE_BEDROCK}}}"
28 |             {{#if USE_BEDROCK}}
29 |             - name: BEDROCK_MODEL
30 |               value: {{{BEDROCK_MODEL}}}
31 |             {{else}}
32 |             - name: LITELLM_BASE_URL
33 |               value: {{{LITELLM_BASE_URL}}}
34 |             - name: LITELLM_API_KEY
35 |               value: {{{LITELLM_API_KEY}}}    
36 |             - name: LITELLM_MODEL_NAME
37 |               value: {{{LITELLM_MODEL_NAME}}}
38 |             {{/if}}
39 |             - name: USE_MCP_TOOLS
40 |               value: "{{{USE_MCP_TOOLS}}}"
41 |             {{#if LANGFUSE_HOST}}
42 |             - name: LANGFUSE_HOST
43 |               value: {{{LANGFUSE_HOST}}}
44 |             - name: LANGFUSE_PUBLIC_KEY
45 |               value: {{{LANGFUSE_PUBLIC_KEY}}}
46 |             - name: LANGFUSE_SECRET_KEY
47 |               value: {{{LANGFUSE_SECRET_KEY}}}
48 |             {{/if}}
49 |           ports:
50 |             - name: http
51 |               containerPort: 80
52 |           resources:
53 |             requests:
54 |               cpu: 250m 
55 |               memory: 512Mi
56 |             limits:
57 |               memory: 512Mi
58 | ---
59 | apiVersion: v1
60 | kind: Service
61 | metadata:
62 |   name: calculator-agent
63 |   namespace: agno
64 | spec:
65 |   selector:
66 |     app: calculator-agent
67 |   ports:
68 |     - name: http
69 |       port: 80
70 | ---
71 | apiVersion: v1
72 | kind: ServiceAccount
73 | metadata:
74 |   name: calculator-agent
75 |   namespace: agno
76 | automountServiceAccountToken: false
77 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/model-qwen3-8b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-8b-fp8
 5 |   namespace: tgi
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-8b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-8b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: tgi
24 |           image: ghcr.io/huggingface/text-generation-inference:3.3.4
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=Qwen/Qwen3-8B-FP8
35 |             - --trust-remote-code
36 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
37 |           env:
38 |             - name: HF_TOKEN
39 |               valueFrom:
40 |                 secretKeyRef:
41 |                   name: hf-token
42 |                   key: token
43 |             - name: CUDA_GRAPHS
44 |               value: "0"
45 |           ports:
46 |             - name: http
47 |               containerPort: 80
48 |           resources:
49 |             requests:
50 |               cpu: 3 #75%
51 |               memory: 24Gi #75%
52 |               nvidia.com/gpu: 1
53 |             limits:
54 |               nvidia.com/gpu: 1
55 |           volumeMounts:
56 |             - name: huggingface-cache
57 |               mountPath: /root/.cache/huggingface
58 |       volumes:
59 |         - name: huggingface-cache
60 |           persistentVolumeClaim:
61 |             claimName: huggingface-cache
62 |       tolerations:
63 |         - key: nvidia.com/gpu
64 |           operator: Exists
65 |           effect: NoSchedule
66 | ---
67 | apiVersion: v1
68 | kind: Service
69 | metadata:
70 |   name: qwen3-8b-fp8
71 |   namespace: tgi
72 | spec:
73 |   selector:
74 |     app: qwen3-8b-fp8
75 |   ports:
76 |     - name: http
77 |       port: 80
78 | 


--------------------------------------------------------------------------------
/components/llm-model/tgi/model-deepseek-r1-qwen3-8b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: deepseek-r1-qwen3-8b
 5 |   namespace: tgi
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: deepseek-r1-qwen3-8b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: deepseek-r1-qwen3-8b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: tgi
24 |           image: ghcr.io/huggingface/text-generation-inference:3.3.4
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
35 |             - --trust-remote-code
36 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
37 |           env:
38 |             - name: HF_TOKEN
39 |               valueFrom:
40 |                 secretKeyRef:
41 |                   name: hf-token
42 |                   key: token
43 |             # - name: CUDA_GRAPHS
44 |             #   value: "0"
45 |           ports:
46 |             - name: http
47 |               containerPort: 80
48 |           resources:
49 |             requests:
50 |               cpu: 3 #75%
51 |               memory: 24Gi #75%
52 |               nvidia.com/gpu: 1
53 |             limits:
54 |               nvidia.com/gpu: 1
55 |           volumeMounts:
56 |             - name: huggingface-cache
57 |               mountPath: /root/.cache/huggingface
58 |       volumes:
59 |         - name: huggingface-cache
60 |           persistentVolumeClaim:
61 |             claimName: huggingface-cache
62 |       tolerations:
63 |         - key: nvidia.com/gpu
64 |           operator: Exists
65 |           effect: NoSchedule
66 | ---
67 | apiVersion: v1
68 | kind: Service
69 | metadata:
70 |   name: deepseek-r1-qwen3-8b
71 |   namespace: tgi
72 | spec:
73 |   selector:
74 |     app: deepseek-r1-qwen3-8b
75 |   ports:
76 |     - name: http
77 |       port: 80
78 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-gemma3-27b-gptq.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: gemma3-27b-gptq
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: gemma3-27b-gptq
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: gemma3-27b-gptq
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g
36 |             - --served-model-name=gemma3-27b-gptq
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |           env:
41 |             - name: HUGGING_FACE_HUB_TOKEN
42 |               valueFrom:
43 |                 secretKeyRef:
44 |                   name: hf-token
45 |                   key: token
46 |           ports:
47 |             - name: http
48 |               containerPort: 8000
49 |           resources:
50 |             requests:
51 |               cpu: 3 #75%
52 |               memory: 24Gi #75%
53 |               nvidia.com/gpu: 1
54 |             limits:
55 |               nvidia.com/gpu: 1
56 |           volumeMounts:
57 |             - name: huggingface-cache
58 |               mountPath: /root/.cache/huggingface
59 |       volumes:
60 |         - name: huggingface-cache
61 |           persistentVolumeClaim:
62 |             claimName: huggingface-cache
63 |       tolerations:
64 |         - key: nvidia.com/gpu
65 |           operator: Exists
66 |           effect: NoSchedule
67 | ---
68 | apiVersion: v1
69 | kind: Service
70 | metadata:
71 |   name: gemma3-27b-gptq
72 |   namespace: vllm
73 | spec:
74 |   selector:
75 |     app: gemma3-27b-gptq
76 |   ports:
77 |     - name: http
78 |       port: 8000
79 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/agent.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: calculator-agent
 5 |   namespace: strands-agents
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: calculator-agent
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: calculator-agent
15 |     spec:
16 |       serviceAccountName: calculator-agent
17 |       automountServiceAccountToken: false    
18 |       {{#unless useBuildx}}
19 |       nodeSelector:
20 |         kubernetes.io/arch: {{{arch}}}
21 |       {{/unless}}
22 |       containers:
23 |         - name: agent
24 |           image: {{{IMAGE}}}
25 |           env:
26 |             - name: USE_BEDROCK
27 |               value: "{{{USE_BEDROCK}}}"
28 |             {{#if USE_BEDROCK}}
29 |             - name: BEDROCK_MODEL
30 |               value: {{{BEDROCK_MODEL}}}
31 |             {{else}}
32 |             - name: LITELLM_BASE_URL
33 |               value: {{{LITELLM_BASE_URL}}}
34 |             - name: LITELLM_API_KEY
35 |               value: {{{LITELLM_API_KEY}}}    
36 |             - name: LITELLM_MODEL_NAME
37 |               value: {{{LITELLM_MODEL_NAME}}}
38 |             {{/if}}
39 |             - name: USE_MCP_TOOLS
40 |               value: "{{{USE_MCP_TOOLS}}}"
41 |             - name: USE_MCP_GATEWAY
42 |               value: "{{{USE_MCP_GATEWAY}}}"
43 |             {{#if LANGFUSE_HOST}}
44 |             - name: LANGFUSE_HOST
45 |               value: {{{LANGFUSE_HOST}}}
46 |             - name: LANGFUSE_PUBLIC_KEY
47 |               value: {{{LANGFUSE_PUBLIC_KEY}}}
48 |             - name: LANGFUSE_SECRET_KEY
49 |               value: {{{LANGFUSE_SECRET_KEY}}}
50 |             {{/if}}
51 |           ports:
52 |             - name: http
53 |               containerPort: 80
54 |           resources:
55 |             requests:
56 |               cpu: 250m 
57 |               memory: 512Mi
58 |             limits:
59 |               memory: 512Mi
60 | ---
61 | apiVersion: v1
62 | kind: Service
63 | metadata:
64 |   name: calculator-agent
65 |   namespace: strands-agents
66 | spec:
67 |   selector:
68 |     app: calculator-agent
69 |   ports:
70 |     - name: http
71 |       port: 80
72 | ---
73 | apiVersion: v1
74 | kind: ServiceAccount
75 | metadata:
76 |   name: calculator-agent
77 |   namespace: strands-agents
78 | automountServiceAccountToken: false
79 | 


--------------------------------------------------------------------------------
/ecr-image-sync.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # ECR Image Sync Script
 4 | # Usage: ./ecr-image-sync.sh
 5 | 
 6 | set -e
 7 | 
 8 | # List of images to sync (add your images here)
 9 | IMAGES=(
10 |     "bitnamilegacy/redis:8.2.1-debian-12-r0"
11 |     "bitnamilegacy/postgresql:17.5.0-debian-12-r8"
12 |     "bitnamilegacy/clickhouse:25.2.1-debian-12-r0"
13 |     "bitnamilegacy/valkey:8.0.2-debian-12-r2"
14 |     "bitnamilegacy/zookeeper:3.9.3-debian-12-r8"
15 |     "bitnamilegacy/minio:2024.12.18-debian-12-r1"
16 | )
17 | 
18 | # Prompt for AWS configuration
19 | read -p "Enter AWS Region: " AWS_REGION
20 | read -p "Enter AWS Account ID: " AWS_ACCOUNT_ID
21 | read -p "Enter Public ECR Registry Alias: " ECR_REGISTRY_ALIAS
22 | 
23 | echo "Configuration:"
24 | echo "  AWS Region: $AWS_REGION"
25 | echo "  AWS Account ID: $AWS_ACCOUNT_ID"
26 | echo "  ECR Registry Alias: $ECR_REGISTRY_ALIAS"
27 | echo ""
28 | 
29 | # Login to public ECR (always uses us-east-1 for public ECR)
30 | echo "Logging into public ECR..."
31 | aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws
32 | 
33 | # Function to process each image
34 | process_image() {
35 |     local image=$1
36 |     local repo_name=$(echo $image | cut -d':' -f1 | sed 's/.*\///')
37 |     local tag=$(echo $image | cut -d':' -f2)
38 |     
39 |     echo "Processing: $image"
40 |     echo "  Repository: $repo_name"
41 |     echo "  Tag: $tag"
42 |     
43 |     # Check if ECR repository exists, create if not
44 |     if ! aws ecr-public describe-repositories --repository-names $repo_name --region us-east-1 2>/dev/null; then
45 |         echo "  Creating ECR repository: $repo_name"
46 |         aws ecr-public create-repository --repository-name $repo_name --region us-east-1
47 |     else
48 |         echo "  ECR repository exists: $repo_name"
49 |     fi
50 |     
51 |     # Use buildx imagetools to copy multi-arch image with manifest list
52 |     local ecr_image="public.ecr.aws/$ECR_REGISTRY_ALIAS/$repo_name:$tag"
53 |     echo "  Copying multi-arch image to: $ecr_image"
54 |     docker buildx imagetools create --tag $ecr_image $image
55 |     
56 |     echo "  ✓ Completed: $image"
57 |     echo ""
58 | }
59 | 
60 | # Process all images in the list
61 | echo "Processing ${#IMAGES[@]} images..."
62 | echo ""
63 | 
64 | for image in "${IMAGES[@]}"; do
65 |     process_image "$image"
66 | done
67 | 
68 | echo "All images processed successfully!"
69 | 


--------------------------------------------------------------------------------
/components/o11y/mlflow/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | 
21 | # S3 Bucket for MLflow
22 | resource "aws_s3_bucket" "mlflow" {
23 |   bucket_prefix = "${var.name}-bucket-mlflow-"
24 |   force_destroy = true
25 | }
26 | 
27 | # Block public access to the S3 bucket
28 | resource "aws_s3_bucket_public_access_block" "mlflow" {
29 |   bucket = aws_s3_bucket.mlflow.id
30 | 
31 |   block_public_acls       = true
32 |   block_public_policy     = true
33 |   ignore_public_acls      = true
34 |   restrict_public_buckets = true
35 | }
36 | 
37 | # Enable server-side encryption for the S3 bucket
38 | resource "aws_s3_bucket_server_side_encryption_configuration" "mlflow" {
39 |   bucket = aws_s3_bucket.mlflow.id
40 | 
41 |   rule {
42 |     apply_server_side_encryption_by_default {
43 |       sse_algorithm = "AES256"
44 |     }
45 |   }
46 | }
47 | 
48 | output "mlflow_bucket_name" {
49 |   value = aws_s3_bucket.mlflow.id
50 | }
51 | 
52 | resource "aws_iam_role" "mlflow_s3_access" {
53 |   name = "${var.name}-${var.region}-mlflow-s3-access"
54 |   assume_role_policy = jsonencode({
55 |     Version = "2012-10-17"
56 |     Statement = [{
57 |       Effect = "Allow"
58 |       Principal = {
59 |         Service = "pods.eks.amazonaws.com"
60 |       }
61 |       Action = ["sts:AssumeRole", "sts:TagSession"]
62 |     }]
63 |   })
64 | }
65 | 
66 | resource "aws_iam_role_policy" "mlflow_s3_access" {
67 |   role = aws_iam_role.mlflow_s3_access.name
68 |   policy = jsonencode({
69 |     Version = "2012-10-17"
70 |     Statement = [{
71 |       Effect = "Allow"
72 |       Action = [
73 |         "s3:GetObject",
74 |         "s3:PutObject",
75 |         "s3:DeleteObject",
76 |         "s3:ListBucket"
77 |       ]
78 |       Resource = [
79 |         "arn:aws:s3:::${var.name}-bucket-mlflow-*",
80 |         "arn:aws:s3:::${var.name}-bucket-mlflow-*/*"
81 |       ]
82 |     }]
83 |   })
84 | }
85 | 
86 | resource "aws_eks_pod_identity_association" "mlflow_s3" {
87 |   cluster_name    = var.name
88 |   namespace       = "mlflow"  
89 |   service_account = "mlflow"  
90 |   role_arn        = aws_iam_role.mlflow_s3_access.arn
91 | }
92 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/model-qwen3-embedding-4b-bf16.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-embedding-4b-bf16
 5 |   namespace: tei
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-embedding-4b-bf16
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-embedding-4b-bf16
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6
22 |       containers:
23 |         - name: tei
24 |           image: ghcr.io/huggingface/text-embeddings-inference:1.8
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=Qwen/Qwen3-Embedding-4B
35 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
36 |           env:
37 |             - name: HF_TOKEN
38 |               valueFrom:
39 |                 secretKeyRef:
40 |                   name: hf-token
41 |                   key: token
42 |           ports:
43 |             - name: http
44 |               containerPort: 80
45 |           resources:
46 |             requests:
47 |               cpu: 3 #75%
48 |               memory: 24Gi #75%
49 |               nvidia.com/gpu: 1
50 |             limits:
51 |               nvidia.com/gpu: 1
52 |           volumeMounts:
53 |             - name: huggingface-cache
54 |               mountPath: /root/.cache/huggingface
55 |             - name: shm
56 |               mountPath: /dev/shm
57 |       volumes:
58 |         - name: huggingface-cache
59 |           persistentVolumeClaim:
60 |             claimName: huggingface-cache
61 |         - name: shm
62 |           emptyDir:
63 |             medium: Memory
64 |             sizeLimit: 10Gi
65 |       tolerations:
66 |         - key: nvidia.com/gpu
67 |           operator: Exists
68 |           effect: NoSchedule
69 | ---
70 | apiVersion: v1
71 | kind: Service
72 | metadata:
73 |   name: qwen3-embedding-4b-bf16
74 |   namespace: tei
75 | spec:
76 |   selector:
77 |     app: qwen3-embedding-4b-bf16
78 |   ports:
79 |     - name: http
80 |       port: 80
81 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/model-qwen3-embedding-8b-bf16.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-embedding-8b-bf16
 5 |   namespace: tei
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-embedding-8b-bf16
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-embedding-8b-bf16
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6
22 |       containers:
23 |         - name: tei
24 |           image: ghcr.io/huggingface/text-embeddings-inference:1.8
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=Qwen/Qwen3-Embedding-8B
35 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
36 |           env:
37 |             - name: HF_TOKEN
38 |               valueFrom:
39 |                 secretKeyRef:
40 |                   name: hf-token
41 |                   key: token
42 |           ports:
43 |             - name: http
44 |               containerPort: 80
45 |           resources:
46 |             requests:
47 |               cpu: 3 #75%
48 |               memory: 12Gi #75%
49 |               nvidia.com/gpu: 1
50 |             limits:
51 |               nvidia.com/gpu: 1
52 |           volumeMounts:
53 |             - name: huggingface-cache
54 |               mountPath: /root/.cache/huggingface
55 |             - name: shm
56 |               mountPath: /dev/shm
57 |       volumes:
58 |         - name: huggingface-cache
59 |           persistentVolumeClaim:
60 |             claimName: huggingface-cache
61 |         - name: shm
62 |           emptyDir:
63 |             medium: Memory
64 |             sizeLimit: 10Gi
65 |       tolerations:
66 |         - key: nvidia.com/gpu
67 |           operator: Exists
68 |           effect: NoSchedule
69 | ---
70 | apiVersion: v1
71 | kind: Service
72 | metadata:
73 |   name: qwen3-embedding-8b-bf16
74 |   namespace: tei
75 | spec:
76 |   selector:
77 |     app: qwen3-embedding-8b-bf16
78 |   ports:
79 |     - name: http
80 |       port: 80
81 | 


--------------------------------------------------------------------------------
/components/embedding-model/tei/model-qwen3-embedding-06b-bf16.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-embedding-06b-bf16
 5 |   namespace: tei
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-embedding-06b-bf16
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-embedding-06b-bf16
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6
22 |       containers:
23 |         - name: tei
24 |           image: ghcr.io/huggingface/text-embeddings-inference:1.8
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           args:
34 |             - --model-id=Qwen/Qwen3-Embedding-0.6B
35 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
36 |           env:
37 |             - name: HF_TOKEN
38 |               valueFrom:
39 |                 secretKeyRef:
40 |                   name: hf-token
41 |                   key: token
42 |           ports:
43 |             - name: http
44 |               containerPort: 80
45 |           resources:
46 |             requests:
47 |               cpu: 3 #75%
48 |               memory: 12Gi #75%
49 |               nvidia.com/gpu: 1
50 |             limits:
51 |               nvidia.com/gpu: 1
52 |           volumeMounts:
53 |             - name: huggingface-cache
54 |               mountPath: /root/.cache/huggingface
55 |             - name: shm
56 |               mountPath: /dev/shm
57 |       volumes:
58 |         - name: huggingface-cache
59 |           persistentVolumeClaim:
60 |             claimName: huggingface-cache
61 |         - name: shm
62 |           emptyDir:
63 |             medium: Memory
64 |             sizeLimit: 10Gi
65 |       tolerations:
66 |         - key: nvidia.com/gpu
67 |           operator: Exists
68 |           effect: NoSchedule
69 | ---
70 | apiVersion: v1
71 | kind: Service
72 | metadata:
73 |   name: qwen3-embedding-06b-bf16
74 |   namespace: tei
75 | spec:
76 |   selector:
77 |     app: qwen3-embedding-06b-bf16
78 |   ports:
79 |     - name: http
80 |       port: 80
81 | 


--------------------------------------------------------------------------------
/components/vector-database/milvus/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | 
21 | # S3 Bucket for Milvus
22 | resource "aws_s3_bucket" "milvus" {
23 |   bucket_prefix = "${var.name}-bucket-milvus-"
24 |   force_destroy = true
25 | }
26 | 
27 | # Block public access to the S3 bucket
28 | resource "aws_s3_bucket_public_access_block" "milvus" {
29 |   bucket = aws_s3_bucket.milvus.id
30 | 
31 |   block_public_acls       = true
32 |   block_public_policy     = true
33 |   ignore_public_acls      = true
34 |   restrict_public_buckets = true
35 | }
36 | 
37 | # Enable server-side encryption for the S3 bucket
38 | resource "aws_s3_bucket_server_side_encryption_configuration" "milvus" {
39 |   bucket = aws_s3_bucket.milvus.id
40 | 
41 |   rule {
42 |     apply_server_side_encryption_by_default {
43 |       sse_algorithm = "AES256"
44 |     }
45 |   }
46 | }
47 | 
48 | output "milvus_bucket_name" {
49 |   value = aws_s3_bucket.milvus.id
50 | }
51 | 
52 | resource "aws_iam_role" "milvus_s3_access" {
53 |   name = "${var.name}-${var.region}-milvus-s3-access"
54 |   assume_role_policy = jsonencode({
55 |     Version = "2012-10-17"
56 |     Statement = [{
57 |       Effect = "Allow"
58 |       Principal = {
59 |         Service = "pods.eks.amazonaws.com"
60 |       }
61 |       Action = ["sts:AssumeRole", "sts:TagSession"]
62 |     }]
63 |   })
64 | }
65 | 
66 | resource "aws_iam_role_policy" "milvus_s3_access" {
67 |   role = aws_iam_role.milvus_s3_access.name
68 |   policy = jsonencode({
69 |     Version = "2012-10-17"
70 |     Statement = [{
71 |       Effect = "Allow"
72 |       Action = [
73 |         "s3:GetObject",
74 |         "s3:PutObject",
75 |         "s3:DeleteObject",
76 |         "s3:ListBucket"
77 |       ]
78 |       Resource = [
79 |         "arn:aws:s3:::${var.name}-bucket-milvus-*",
80 |         "arn:aws:s3:::${var.name}-bucket-milvus-*/*"
81 |       ]
82 |     }]
83 |   })
84 | }
85 | 
86 | resource "aws_eks_pod_identity_association" "milvus_s3" {
87 |   cluster_name    = var.name
88 |   namespace       = "milvus"  
89 |   service_account = "milvus"  
90 |   role_arn        = aws_iam_role.milvus_s3_access.arn
91 | }
92 | 


--------------------------------------------------------------------------------
/components/ai-gateway/kong/values.template.yaml:
--------------------------------------------------------------------------------
 1 | proxy:
 2 |   enabled: true
 3 |   type: ClusterIP
 4 |   ingress:
 5 |     enabled: true
 6 |     hostname: kong.{{{DOMAIN}}}
 7 |     path: /
 8 |     pathType: Prefix
 9 |     ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
10 |     annotations:
11 |       alb.ingress.kubernetes.io/target-type: ip
12 |       alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
13 |   http:
14 |     enabled: true
15 |   tls:
16 |     enabled: false
17 | 
18 | # Note. Kong Enterprise RBAC is required to securely expose Kong Manager via Ingress.
19 | #       Since Kong Manager call Kong Admin API directly from the browser, we cannot just put them behind Nginx Ingress.
20 | # Using:
21 | #       kubectl -n kong port-forward svc/kong-kong-admin 8001:800
22 | #       kubectl -n kong port-forward svc/kong-kong-manager 8002:8002
23 | manager:
24 |   enabled: true
25 |   type: ClusterIP
26 |   # ingress:
27 |   #   enabled: true
28 |   #   hostname: kong-manager.{{{DOMAIN}}}
29 |   #   path: /
30 |   #   pathType: Prefix
31 |   #   ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
32 |   #   annotations:
33 |   #     alb.ingress.kubernetes.io/target-type: ip
34 |   #     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
35 |   http:
36 |     enabled: true
37 |   tls:
38 |     enabled: false
39 | 
40 | admin:
41 |   enabled: true
42 |   type: ClusterIP
43 |   # ingress:
44 |   #   enabled: true
45 |   #   hostname: kong-admin.{{{DOMAIN}}}
46 |   #   path: /
47 |   #   pathType: Prefix
48 |   #   ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb
49 |   #   annotations:
50 |   #     alb.ingress.kubernetes.io/target-type: ip
51 |   #     alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
52 |   http:
53 |     enabled: true
54 |   tls:
55 |     enabled: false
56 | 
57 | postgresql:
58 |   enabled: true
59 |   auth:
60 |     postgresPassword: Pass@123
61 |     password: Pass@123
62 |   image:  
63 |     registry: public.ecr.aws
64 |     repository: agentic-ai-platforms-on-k8s/postgresql
65 |     tag: 17.5.0-debian-12-r8
66 |     
67 | env:
68 |   database: postgres
69 | #   admin_gui_url: https://kong-manager.{{{DOMAIN}}}
70 | #   admin_gui_api_url: https://kong-admin.{{{DOMAIN}}}
71 | #   admin_gui_session_conf: '{"secret":"secret","storage":"kong","cookie_secure":false}'
72 | #   password: {{{KONG_MANAGER_PASSWORD}}}
73 | 
74 | # enterprise:
75 | #   rbac:
76 | #     enabled: true
77 | #     admin_gui_auth: basic-auth


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-deepseek-r1-qwen3-8b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: deepseek-r1-qwen3-8b
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: deepseek-r1-qwen3-8b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: deepseek-r1-qwen3-8b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
36 |             - --served-model-name=deepseek-r1-qwen3-8b
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |             # DeepSeek-R1 specific
41 |             - --reasoning-parser=deepseek_r1
42 |           env:
43 |             - name: HUGGING_FACE_HUB_TOKEN
44 |               valueFrom:
45 |                 secretKeyRef:
46 |                   name: hf-token
47 |                   key: token
48 |           ports:
49 |             - name: http
50 |               containerPort: 8000
51 |           resources:
52 |             requests:
53 |               cpu: 3 #75%
54 |               memory: 24Gi #75%
55 |               nvidia.com/gpu: 1
56 |             limits:
57 |               nvidia.com/gpu: 1
58 |           volumeMounts:
59 |             - name: huggingface-cache
60 |               mountPath: /root/.cache/huggingface
61 |       volumes:
62 |         - name: huggingface-cache
63 |           persistentVolumeClaim:
64 |             claimName: huggingface-cache
65 |       tolerations:
66 |         - key: nvidia.com/gpu
67 |           operator: Exists
68 |           effect: NoSchedule
69 | ---
70 | apiVersion: v1
71 | kind: Service
72 | metadata:
73 |   name: deepseek-r1-qwen3-8b
74 |   namespace: vllm
75 | spec:
76 |   selector:
77 |     app: deepseek-r1-qwen3-8b
78 |   ports:
79 |     - name: http
80 |       port: 8000
81 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-qwen3-30b-thinking-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-30b-thinking-fp8
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-30b-thinking-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-30b-thinking-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - Qwen/Qwen3-30B-A3B-Thinking-2507-FP8
36 |             - --served-model-name=qwen3-30b-thinking-fp8
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |             # Qwen3 specific
41 |             - --reasoning-parser=qwen3
42 |           env:
43 |             - name: HUGGING_FACE_HUB_TOKEN
44 |               valueFrom:
45 |                 secretKeyRef:
46 |                   name: hf-token
47 |                   key: token
48 |           ports:
49 |             - name: http
50 |               containerPort: 8000
51 |           resources:
52 |             requests:
53 |               cpu: 3 #75%
54 |               memory: 24Gi #75%
55 |               nvidia.com/gpu: 1
56 |             limits:
57 |               nvidia.com/gpu: 1
58 |           volumeMounts:
59 |             - name: huggingface-cache
60 |               mountPath: /root/.cache/huggingface
61 |       volumes:
62 |         - name: huggingface-cache
63 |           persistentVolumeClaim:
64 |             claimName: huggingface-cache
65 |       tolerations:
66 |         - key: nvidia.com/gpu
67 |           operator: Exists
68 |           effect: NoSchedule
69 | ---
70 | apiVersion: v1
71 | kind: Service
72 | metadata:
73 |   name: qwen3-30b-thinking-fp8
74 |   namespace: vllm
75 | spec:
76 |   selector:
77 |     app: qwen3-30b-thinking-fp8
78 |   ports:
79 |     - name: http
80 |       port: 8000
81 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-gpt-oss-20b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: gpt-oss-20b
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: gpt-oss-20b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: gpt-oss-20b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:gptoss
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - openai/gpt-oss-20b
36 |             - --served-model-name=gpt-oss-20b
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |             # - --max-model-len=131072 # 128K
41 |           env:
42 |             - name: HUGGING_FACE_HUB_TOKEN
43 |               valueFrom:
44 |                 secretKeyRef:
45 |                   name: hf-token
46 |                   key: token
47 |             - name: VLLM_ATTENTION_BACKEND
48 |               value: TRITON_ATTN_VLLM_V1
49 |           resources:
50 |             requests:
51 |               cpu: 3 #75%
52 |               memory: 24Gi #75%
53 |               nvidia.com/gpu: 1
54 |             limits:
55 |               nvidia.com/gpu: 1
56 |           volumeMounts:
57 |             - name: huggingface-cache
58 |               mountPath: /root/.cache/huggingface
59 |             - name: shm
60 |               mountPath: /dev/shm
61 |       volumes:
62 |         - name: huggingface-cache
63 |           persistentVolumeClaim:
64 |             claimName: huggingface-cache
65 |         - name: shm
66 |           emptyDir:
67 |             medium: Memory
68 |       tolerations:
69 |         - key: nvidia.com/gpu
70 |           operator: Exists
71 |           effect: NoSchedule
72 | ---
73 | apiVersion: v1
74 | kind: Service
75 | metadata:
76 |   name: gpt-oss-20b
77 |   namespace: vllm
78 | spec:
79 |   selector:
80 |     app: gpt-oss-20b
81 |   ports:
82 |     - name: http
83 |       port: 8000
84 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-qwen3-30b-instruct-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-30b-instruct-fp8
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-30b-instruct-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-30b-instruct-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - Qwen/Qwen3-30B-A3B-Instruct-2507-FP8
36 |             - --served-model-name=qwen3-30b-instruct-fp8
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |             # Qwen3 specific
41 |             - --enable-auto-tool-choice
42 |             - --tool-call-parser=hermes
43 |           env:
44 |             - name: HUGGING_FACE_HUB_TOKEN
45 |               valueFrom:
46 |                 secretKeyRef:
47 |                   name: hf-token
48 |                   key: token
49 |           ports:
50 |             - name: http
51 |               containerPort: 8000
52 |           resources:
53 |             requests:
54 |               cpu: 3 #75%
55 |               memory: 24Gi #75%
56 |               nvidia.com/gpu: 1
57 |             limits:
58 |               nvidia.com/gpu: 1
59 |           volumeMounts:
60 |             - name: huggingface-cache
61 |               mountPath: /root/.cache/huggingface
62 |       volumes:
63 |         - name: huggingface-cache
64 |           persistentVolumeClaim:
65 |             claimName: huggingface-cache
66 |       tolerations:
67 |         - key: nvidia.com/gpu
68 |           operator: Exists
69 |           effect: NoSchedule
70 | ---
71 | apiVersion: v1
72 | kind: Service
73 | metadata:
74 |   name: qwen3-30b-instruct-fp8
75 |   namespace: vllm
76 | spec:
77 |   selector:
78 |     app: qwen3-30b-instruct-fp8
79 |   ports:
80 |     - name: http
81 |       port: 8000
82 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/model-gpt-oss-20b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: gpt-oss-20b
 5 |   namespace: sglang
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: gpt-oss-20b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: gpt-oss-20b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: sglang
24 |           image: docker.io/lmsysorg/sglang:v0.5.0rc1-cu126
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["python3", "-m", "sglang.launch_server"]
34 |           args:
35 |             - --model-path=openai/gpt-oss-20b
36 |             - --host=0.0.0.0
37 |             - --port=30000
38 |             - --trust-remote-code
39 |             - --mem-fraction-static=0.90
40 |             - --context-length=32768 # 32K
41 |           env:
42 |             - name: HF_TOKEN
43 |               valueFrom:
44 |                 secretKeyRef:
45 |                   name: hf-token
46 |                   key: token
47 |           ports:
48 |             - name: http
49 |               containerPort: 30000
50 |           resources:
51 |             requests:
52 |               cpu: 3 #75%
53 |               memory: 24Gi #75%
54 |               nvidia.com/gpu: 1
55 |             limits:
56 |               nvidia.com/gpu: 1
57 |           volumeMounts:
58 |             - name: huggingface-cache
59 |               mountPath: /root/.cache/huggingface
60 |             - name: shm
61 |               mountPath: /dev/shm
62 |       volumes:
63 |         - name: huggingface-cache
64 |           persistentVolumeClaim:
65 |             claimName: huggingface-cache
66 |         - name: shm
67 |           emptyDir:
68 |             medium: Memory
69 |             sizeLimit: 10Gi
70 |       tolerations:
71 |         - key: nvidia.com/gpu
72 |           operator: Exists
73 |           effect: NoSchedule
74 | ---
75 | apiVersion: v1
76 | kind: Service
77 | metadata:
78 |   name: gpt-oss-20b
79 |   namespace: sglang
80 | spec:
81 |   selector:
82 |     app: gpt-oss-20b
83 |   ports:
84 |     - name: http
85 |       port: 30000
86 | 


--------------------------------------------------------------------------------
/components/o11y/langfuse/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "name" {
 6 |   type    = string
 7 |   default = "genai-on-eks"
 8 | }
 9 | terraform {
10 |   required_providers {
11 |     aws = {
12 |       source  = "hashicorp/aws"
13 |       version = "~> 5.96.0"
14 |     }
15 |   }
16 | }
17 | provider "aws" {
18 |   region = var.region
19 | }
20 | 
21 | # S3 Bucket for Langfuse
22 | resource "aws_s3_bucket" "langfuse" {
23 |   bucket_prefix = "${var.name}-bucket-langfuse-"
24 |   force_destroy = true
25 | }
26 | 
27 | # Block public access to the S3 bucket
28 | resource "aws_s3_bucket_public_access_block" "langfuse" {
29 |   bucket = aws_s3_bucket.langfuse.id
30 | 
31 |   block_public_acls       = true
32 |   block_public_policy     = true
33 |   ignore_public_acls      = true
34 |   restrict_public_buckets = true
35 | }
36 | 
37 | # Enable server-side encryption for the S3 bucket
38 | resource "aws_s3_bucket_server_side_encryption_configuration" "langfuse" {
39 |   bucket = aws_s3_bucket.langfuse.id
40 | 
41 |   rule {
42 |     apply_server_side_encryption_by_default {
43 |       sse_algorithm = "AES256"
44 |     }
45 |   }
46 | }
47 | 
48 | output "langfuse_bucket_name" {
49 |   value = aws_s3_bucket.langfuse.id
50 | }
51 | 
52 | output "langfuse_s3_role_arn" {
53 |   value = aws_iam_role.langfuse_s3_access.arn
54 | }
55 | 
56 | resource "aws_iam_role" "langfuse_s3_access" {
57 |   name = "${var.name}-${var.region}-langfuse-s3-access"
58 |   assume_role_policy = jsonencode({
59 |     Version = "2012-10-17"
60 |     Statement = [{
61 |       Effect = "Allow"
62 |       Principal = {
63 |         Service = "pods.eks.amazonaws.com"
64 |       }
65 |       Action = ["sts:AssumeRole", "sts:TagSession"]
66 |     }]
67 |   })
68 | }
69 | 
70 | resource "aws_iam_role_policy" "langfuse_s3_access" {
71 |   role = aws_iam_role.langfuse_s3_access.name
72 |   policy = jsonencode({
73 |     Version = "2012-10-17"
74 |     Statement = [{
75 |       Effect = "Allow"
76 |       Action = [
77 |         "s3:GetObject",
78 |         "s3:PutObject",
79 |         "s3:DeleteObject",
80 |         "s3:ListBucket"
81 |       ]
82 |       Resource = [
83 |         "arn:aws:s3:::${var.name}-bucket-langfuse-*",
84 |         "arn:aws:s3:::${var.name}-bucket-langfuse-*/*"
85 |       ]
86 |     }]
87 |   })
88 | }
89 | 
90 | resource "aws_eks_pod_identity_association" "langfuse_s3" {
91 |   cluster_name    = var.name
92 |   namespace       = "langfuse"  
93 |   service_account = "langfuse"  
94 |   role_arn        = aws_iam_role.langfuse_s3_access.arn
95 | }
96 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-qwen3-32b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-32b-fp8
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-32b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-32b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - Qwen/Qwen3-32B-FP8
36 |             - --served-model-name=qwen3-32b-fp8
37 |             - --trust-remote-code
38 |             # - --gpu-memory-utilization=0.90
39 |             - --gpu-memory-utilization=0.95
40 |             - --max-model-len=32768 # 32K
41 |             # - --max-model-len=16384 # 16K
42 |             # Qwen3 specific
43 |             - --enable-auto-tool-choice
44 |             - --tool-call-parser=hermes
45 |             - --reasoning-parser=qwen3
46 |           env:
47 |             - name: HUGGING_FACE_HUB_TOKEN
48 |               valueFrom:
49 |                 secretKeyRef:
50 |                   name: hf-token
51 |                   key: token
52 |           ports:
53 |             - name: http
54 |               containerPort: 8000
55 |           resources:
56 |             requests:
57 |               cpu: 3 #75%
58 |               memory: 24Gi #75%
59 |               nvidia.com/gpu: 1
60 |             limits:
61 |               nvidia.com/gpu: 1
62 |           volumeMounts:
63 |             - name: huggingface-cache
64 |               mountPath: /root/.cache/huggingface
65 |       volumes:
66 |         - name: huggingface-cache
67 |           persistentVolumeClaim:
68 |             claimName: huggingface-cache
69 |       tolerations:
70 |         - key: nvidia.com/gpu
71 |           operator: Exists
72 |           effect: NoSchedule
73 | ---
74 | apiVersion: v1
75 | kind: Service
76 | metadata:
77 |   name: qwen3-32b-fp8
78 |   namespace: vllm
79 | spec:
80 |   selector:
81 |     app: qwen3-32b-fp8
82 |   ports:
83 |     - name: http
84 |       port: 8000
85 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-qwen3-coder-30b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-coder-30b-fp8
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-coder-30b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-coder-30b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8
36 |             - --served-model-name=qwen3-coder-30b-fp8
37 |             - --trust-remote-code
38 |             # - --gpu-memory-utilization=0.90
39 |             - --gpu-memory-utilization=0.95
40 |             # - --max-model-len=32768 # 32K
41 |             - --max-model-len=131072 # 128K
42 |             # Qwen3 specific
43 |             - --enable-auto-tool-choice
44 |             - --tool-call-parser=qwen3_coder
45 |           env:
46 |             - name: HUGGING_FACE_HUB_TOKEN
47 |               valueFrom:
48 |                 secretKeyRef:
49 |                   name: hf-token
50 |                   key: token
51 |           ports:
52 |             - name: http
53 |               containerPort: 8000
54 |           resources:
55 |             requests:
56 |               cpu: 3 #75%
57 |               memory: 24Gi #75%
58 |               nvidia.com/gpu: 1
59 |             limits:
60 |               nvidia.com/gpu: 1
61 |           volumeMounts:
62 |             - name: huggingface-cache
63 |               mountPath: /root/.cache/huggingface
64 |       volumes:
65 |         - name: huggingface-cache
66 |           persistentVolumeClaim:
67 |             claimName: huggingface-cache
68 |       tolerations:
69 |         - key: nvidia.com/gpu
70 |           operator: Exists
71 |           effect: NoSchedule
72 | ---
73 | apiVersion: v1
74 | kind: Service
75 | metadata:
76 |   name: qwen3-coder-30b-fp8
77 |   namespace: vllm
78 | spec:
79 |   selector:
80 |     app: qwen3-coder-30b-fp8
81 |   ports:
82 |     - name: http
83 |       port: 8000
84 | 


--------------------------------------------------------------------------------
/examples/agno/calculator-agent/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Agno - Calculator Agent";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const { REGION } = process.env;
25 |   await utils.terraform.apply(DIR);
26 |   const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" });
27 |   cd(DIR);
28 |   await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`;
29 |   const { useBuildx, arch } = config.docker;
30 |   if (useBuildx) {
31 |     await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`;
32 |   } else {
33 |     await $`docker build -t ${ecrRepoUrl}:latest .`;
34 |     await $`docker push ${ecrRepoUrl}:latest`;
35 |   }
36 |   await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`;
37 |   const agentTemplatePath = path.join(DIR, "agent.template.yaml");
38 |   const agentRenderedPath = path.join(DIR, "agent.rendered.yaml");
39 |   const agentTemplateString = fs.readFileSync(agentTemplatePath, "utf8");
40 |   const agentTemplate = handlebars.compile(agentTemplateString);
41 |   const { LITELLM_API_KEY } = process.env;
42 |   const agentVars = {
43 |     useBuildx,
44 |     arch,
45 |     IMAGE: `${ecrRepoUrl}:latest`,
46 |     ...config["examples"]["agno"]["calculator-agent"].env,
47 |     LITELLM_BASE_URL: `http://litellm.litellm:4000/v1`,
48 |     LITELLM_API_KEY: LITELLM_API_KEY,
49 |   };
50 |   const result = await $`kubectl get pod -n langfuse -l app=web --ignore-not-found`;
51 |   if (result.stdout.includes("langfuse")) {
52 |     agentVars.LANGFUSE_HOST = "http://langfuse-web.langfuse:3000";
53 |     agentVars.LANGFUSE_PUBLIC_KEY = process.env.LANGFUSE_PUBLIC_KEY;
54 |     agentVars.LANGFUSE_SECRET_KEY = process.env.LANGFUSE_SECRET_KEY;
55 |   }
56 |   fs.writeFileSync(agentRenderedPath, agentTemplate(agentVars));
57 |   await $`kubectl apply -f ${DIR}/agent.rendered.yaml`;
58 | }
59 | 
60 | export async function uninstall() {
61 |   await $`kubectl delete -f ${DIR}/agent.rendered.yaml --ignore-not-found`;
62 |   await utils.terraform.destroy(DIR);
63 | }
64 | 


--------------------------------------------------------------------------------
/examples/strands-agents/calculator-agent/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import handlebars from "handlebars";
 7 | import { $, cd } from "zx";
 8 | $.verbose = true;
 9 | 
10 | export const name = "Calculator Agent";
11 | const __filename = fileURLToPath(import.meta.url);
12 | const DIR = path.dirname(__filename);
13 | let BASE_DIR;
14 | let config;
15 | let utils;
16 | 
17 | export async function init(_BASE_DIR, _config, _utils) {
18 |   BASE_DIR = _BASE_DIR;
19 |   config = _config;
20 |   utils = _utils;
21 | }
22 | 
23 | export async function install() {
24 |   const { REGION } = process.env;
25 |   await utils.terraform.apply(DIR);
26 |   const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" });
27 |   cd(DIR);
28 |   await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`;
29 |   const { useBuildx, arch } = config.docker;
30 |   if (useBuildx) {
31 |     await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`;
32 |   } else {
33 |     await $`docker build -t ${ecrRepoUrl}:latest .`;
34 |     await $`docker push ${ecrRepoUrl}:latest`;
35 |   }
36 |   await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`;
37 |   const agentTemplatePath = path.join(DIR, "agent.template.yaml");
38 |   const agentRenderedPath = path.join(DIR, "agent.rendered.yaml");
39 |   const agentTemplateString = fs.readFileSync(agentTemplatePath, "utf8");
40 |   const agentTemplate = handlebars.compile(agentTemplateString);
41 |   const { LITELLM_API_KEY } = process.env;
42 |   const agentVars = {
43 |     useBuildx,
44 |     arch,
45 |     IMAGE: `${ecrRepoUrl}:latest`,
46 |     ...config["examples"]["strands-agents"]["calculator-agent"].env,
47 |     LITELLM_BASE_URL: `http://litellm.litellm:4000`,
48 |     LITELLM_API_KEY: LITELLM_API_KEY,
49 |   };
50 |   const result = await $`kubectl get pod -n langfuse -l app=web --ignore-not-found`;
51 |   if (result.stdout.includes("langfuse")) {
52 |     agentVars.LANGFUSE_HOST = "http://langfuse-web.langfuse:3000";
53 |     agentVars.LANGFUSE_PUBLIC_KEY = process.env.LANGFUSE_PUBLIC_KEY;
54 |     agentVars.LANGFUSE_SECRET_KEY = process.env.LANGFUSE_SECRET_KEY;
55 |   }
56 |   fs.writeFileSync(agentRenderedPath, agentTemplate(agentVars));
57 |   await $`kubectl apply -f ${DIR}/agent.rendered.yaml`;
58 | }
59 | 
60 | export async function uninstall() {
61 |   await $`kubectl delete -f ${DIR}/agent.rendered.yaml --ignore-not-found`;
62 |   await utils.terraform.destroy(DIR);
63 | }
64 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-gpt-oss-120b.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: gpt-oss-120b
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: gpt-oss-120b
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: gpt-oss-120b
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         # {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |         {{{KARPENTER_PREFIX}}}/instance-category: p
23 |         {{{KARPENTER_PREFIX}}}/instance-generation: "5"
24 |       containers:
25 |         - name: vllm
26 |           image: vllm/vllm-openai:gptoss
27 |           imagePullPolicy: IfNotPresent
28 |           securityContext:
29 |             allowPrivilegeEscalation: false
30 |             capabilities:
31 |               drop:
32 |                 - NET_RAW
33 |             seccompProfile:
34 |               type: RuntimeDefault
35 |           command: ["vllm", "serve"]
36 |           args:
37 |             - openai/gpt-oss-120b
38 |             - --served-model-name=gpt-oss-120b
39 |             - --trust-remote-code
40 |             - --gpu-memory-utilization=0.90
41 |             - --max-model-len=32768 # 32K
42 |             # - --max-model-len=131072 # 128K
43 |             - --tensor-parallel-size=8
44 |             - --async-scheduling
45 |           env:
46 |             - name: HUGGING_FACE_HUB_TOKEN
47 |               valueFrom:
48 |                 secretKeyRef:
49 |                   name: hf-token
50 |                   key: token
51 |           ports:
52 |             - name: http
53 |               containerPort: 8000
54 |           resources:
55 |             requests:
56 |               nvidia.com/gpu: 8
57 |             limits:
58 |               nvidia.com/gpu: 8
59 |           volumeMounts:
60 |             - name: huggingface-cache
61 |               mountPath: /root/.cache/huggingface
62 |             - name: shm
63 |               mountPath: /dev/shm
64 |       volumes:
65 |         - name: huggingface-cache
66 |           persistentVolumeClaim:
67 |             claimName: huggingface-cache
68 |         - name: shm
69 |           emptyDir:
70 |             medium: Memory
71 |       tolerations:
72 |         - key: nvidia.com/gpu
73 |           operator: Exists
74 |           effect: NoSchedule
75 | ---
76 | apiVersion: v1
77 | kind: Service
78 | metadata:
79 |   name: gpt-oss-120b
80 |   namespace: vllm
81 | spec:
82 |   selector:
83 |     app: gpt-oss-120b
84 |   ports:
85 |     - name: http
86 |       port: 8000
87 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-qwen3-8b-neuron.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-8b-neuron
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-8b-neuron
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-8b-neuron
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         node.kubernetes.io/instance-type: inf2.xlarge
22 |       containers:
23 |         - name: vllm
24 |           image: public.ecr.aws/agentic-ai-platforms-on-k8s/vllm-neuron:qwen3-8b-optimum-neuron
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - /root/.cache/neuron/Qwen/Qwen3-8B
36 |             - --served-model-name=qwen3-8b-neuron
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             # - --max-model-len=32768 # 32K
40 |             # Qwen3 specific
41 |             - --enable-auto-tool-choice
42 |             - --tool-call-parser=hermes
43 |             - --reasoning-parser=qwen3
44 |             # Neuron specific
45 |             - --tensor-parallel-size=2
46 |             # - --gpu-memory-utilization=0.95
47 |             - --max-num-seqs=2
48 |             - --max-model-len=8192
49 |           env:
50 |             - name: HF_HOME
51 |               value: /root/.cache/huggingface
52 |             - name: HF_HUB_CACHE
53 |               value: /root/.cache/huggingface/hub
54 |             - name: NEURON_RT_NUM_CORES
55 |               value: "2"
56 |             - name: NEURON_RT_VISIBLE_CORES
57 |               value: "0-1"
58 |           ports:
59 |             - name: http
60 |               containerPort: 8000
61 |           resources:
62 |             requests:
63 |               cpu: 3 #75%
64 |               memory: 12Gi #75%
65 |               aws.amazon.com/neuroncore: 2
66 |             limits:
67 |               aws.amazon.com/neuroncore: 2
68 |       tolerations:
69 |         - key: aws.amazon.com/neuron
70 |           operator: Exists
71 |           effect: NoSchedule
72 | ---
73 | apiVersion: v1
74 | kind: Service
75 | metadata:
76 |   name: qwen3-8b-neuron
77 |   namespace: vllm
78 | spec:
79 |   selector:
80 |     app: qwen3-8b-neuron
81 |   ports:
82 |     - name: http
83 |       port: 8000
84 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/model-qwen3-32b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-32b-fp8
 5 |   namespace: sglang
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-32b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-32b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: sglang
24 |           image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["python3", "-m", "sglang.launch_server"]
34 |           args:
35 |             - --model-path=Qwen/Qwen3-32B-FP8
36 |             - --host=0.0.0.0
37 |             - --port=30000
38 |             - --trust-remote-code
39 |             - --mem-fraction-static=0.90
40 |             - --context-length=32768 # 32K
41 |             # Qwen3 specific
42 |             - --tool-call-parser=qwen25
43 |             - --reasoning-parser=qwen3
44 |           env:
45 |             - name: HF_TOKEN
46 |               valueFrom:
47 |                 secretKeyRef:
48 |                   name: hf-token
49 |                   key: token
50 |           ports:
51 |             - name: http
52 |               containerPort: 30000
53 |           resources:
54 |             requests:
55 |               cpu: 3 #75%
56 |               memory: 24Gi #75%
57 |               nvidia.com/gpu: 1
58 |             limits:
59 |               nvidia.com/gpu: 1
60 |           volumeMounts:
61 |             - name: huggingface-cache
62 |               mountPath: /root/.cache/huggingface
63 |             - name: shm
64 |               mountPath: /dev/shm
65 |       volumes:
66 |         - name: huggingface-cache
67 |           persistentVolumeClaim:
68 |             claimName: huggingface-cache
69 |         - name: shm
70 |           emptyDir:
71 |             medium: Memory
72 |             sizeLimit: 10Gi
73 |       tolerations:
74 |         - key: nvidia.com/gpu
75 |           operator: Exists
76 |           effect: NoSchedule
77 | ---
78 | apiVersion: v1
79 | kind: Service
80 | metadata:
81 |   name: qwen3-32b-fp8
82 |   namespace: sglang
83 | spec:
84 |   selector:
85 |     app: qwen3-32b-fp8
86 |   ports:
87 |     - name: http
88 |       port: 30000
89 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-magistral-24b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: magistral-24b-fp8
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: magistral-24b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: magistral-24b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: vllm
24 |           image: vllm/vllm-openai:v0.10.2
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - RedHatAI/Magistral-Small-2506-FP8
36 |             - --served-model-name=magistral-24b-fp8
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             - --max-model-len=32768 # 32K
40 |             # Mistral specific
41 |             - --enable-auto-tool-choice
42 |             - --tool-call-parser=mistral
43 |             - --chat-template=examples/tool_chat_template_mistral.jinja
44 |             - --tokenizer-mode=mistral
45 |             - --load-format=mistral
46 |             - --config-format=mistral
47 |           env:
48 |             - name: HUGGING_FACE_HUB_TOKEN
49 |               valueFrom:
50 |                 secretKeyRef:
51 |                   name: hf-token
52 |                   key: token
53 |           ports:
54 |             - name: http
55 |               containerPort: 8000
56 |           resources:
57 |             requests:
58 |               cpu: 3 #75%
59 |               memory: 24Gi #75%
60 |               nvidia.com/gpu: 1
61 |             limits:
62 |               nvidia.com/gpu: 1
63 |           volumeMounts:
64 |             - name: huggingface-cache
65 |               mountPath: /root/.cache/huggingface
66 |       volumes:
67 |         - name: huggingface-cache
68 |           persistentVolumeClaim:
69 |             claimName: huggingface-cache
70 |       tolerations:
71 |         - key: nvidia.com/gpu
72 |           operator: Exists
73 |           effect: NoSchedule
74 | ---
75 | apiVersion: v1
76 | kind: Service
77 | metadata:
78 |   name: magistral-24b-fp8
79 |   namespace: vllm
80 | spec:
81 |   selector:
82 |     app: magistral-24b-fp8
83 |   ports:
84 |     - name: http
85 |       port: 8000
86 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/model-qwen3-30b-instruct-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-30b-instruct-fp8
 5 |   namespace: sglang
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-30b-instruct-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-30b-instruct-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: sglang
24 |           image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["python3", "-m", "sglang.launch_server"]
34 |           args:
35 |             - --model-path=Qwen/Qwen3-30B-A3B-Instruct-2507-FP8
36 |             - --host=0.0.0.0
37 |             - --port=30000
38 |             - --trust-remote-code
39 |             - --mem-fraction-static=0.90
40 |             - --context-length=32768 # 32K
41 |             # Qwen3 specific
42 |             - --tool-call-parser=qwen25
43 |           env:
44 |             - name: HF_TOKEN
45 |               valueFrom:
46 |                 secretKeyRef:
47 |                   name: hf-token
48 |                   key: token
49 |           ports:
50 |             - name: http
51 |               containerPort: 30000
52 |           resources:
53 |             requests:
54 |               cpu: 3 #75%
55 |               memory: 24Gi #75%
56 |               nvidia.com/gpu: 1
57 |             limits:
58 |               nvidia.com/gpu: 1
59 |           volumeMounts:
60 |             - name: huggingface-cache
61 |               mountPath: /root/.cache/huggingface
62 |             - name: shm
63 |               mountPath: /dev/shm
64 |       volumes:
65 |         - name: huggingface-cache
66 |           persistentVolumeClaim:
67 |             claimName: huggingface-cache
68 |         - name: shm
69 |           emptyDir:
70 |             medium: Memory
71 |             sizeLimit: 10Gi
72 |       tolerations:
73 |         - key: nvidia.com/gpu
74 |           operator: Exists
75 |           effect: NoSchedule
76 | ---
77 | apiVersion: v1
78 | kind: Service
79 | metadata:
80 |   name: qwen3-30b-instruct-fp8
81 |   namespace: sglang
82 | spec:
83 |   selector:
84 |     app: qwen3-30b-instruct-fp8
85 |   ports:
86 |     - name: http
87 |       port: 30000
88 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/model-qwen3-30b-thinking-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-30b-thinking-fp8
 5 |   namespace: sglang
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-30b-thinking-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-30b-thinking-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: sglang
24 |           image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["python3", "-m", "sglang.launch_server"]
34 |           args:
35 |             - --model-path=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8
36 |             - --host=0.0.0.0
37 |             - --port=30000
38 |             - --trust-remote-code
39 |             - --mem-fraction-static=0.90
40 |             - --context-length=32768 # 32K
41 |             # Qwen3 specific
42 |             - --reasoning-parser=qwen3
43 |           env:
44 |             - name: HF_TOKEN
45 |               valueFrom:
46 |                 secretKeyRef:
47 |                   name: hf-token
48 |                   key: token
49 |           ports:
50 |             - name: http
51 |               containerPort: 30000
52 |           resources:
53 |             requests:
54 |               cpu: 3 #75%
55 |               memory: 24Gi #75%
56 |               nvidia.com/gpu: 1
57 |             limits:
58 |               nvidia.com/gpu: 1
59 |           volumeMounts:
60 |             - name: huggingface-cache
61 |               mountPath: /root/.cache/huggingface
62 |             - name: shm
63 |               mountPath: /dev/shm
64 |       volumes:
65 |         - name: huggingface-cache
66 |           persistentVolumeClaim:
67 |             claimName: huggingface-cache
68 |         - name: shm
69 |           emptyDir:
70 |             medium: Memory
71 |             sizeLimit: 10Gi
72 |       tolerations:
73 |         - key: nvidia.com/gpu
74 |           operator: Exists
75 |           effect: NoSchedule
76 | ---
77 | apiVersion: v1
78 | kind: Service
79 | metadata:
80 |   name: qwen3-30b-thinking-fp8
81 |   namespace: sglang
82 | spec:
83 |   selector:
84 |     app: qwen3-30b-thinking-fp8
85 |   ports:
86 |     - name: http
87 |       port: 30000
88 | 


--------------------------------------------------------------------------------
/components/llm-model/vllm/model-deepseek-r1-qwen3-8b-neuron.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: deepseek-r1-qwen3-8b-neuron
 5 |   namespace: vllm
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: deepseek-r1-qwen3-8b-neuron
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: deepseek-r1-qwen3-8b-neuron
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         node.kubernetes.io/instance-type: inf2.xlarge
22 |       containers:
23 |         - name: vllm
24 |           image: public.ecr.aws/agentic-ai-platforms-on-k8s/vllm-neuron:deepseek-r1-qwen3-8b-optimum-neuron
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["vllm", "serve"]
34 |           args:
35 |             - /root/.cache/neuron/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
36 |             - --served-model-name=deepseek-r1-qwen3-8b-neuron
37 |             - --trust-remote-code
38 |             - --gpu-memory-utilization=0.90
39 |             # - --max-model-len=32768 # 32K
40 |             # DeepSeek-R1 specific
41 |             - --reasoning-parser=deepseek_r1
42 |             # Neuron specific
43 |             - --tensor-parallel-size=2
44 |             # - --gpu-memory-utilization=0.95
45 |             - --max-num-seqs=2
46 |             - --max-model-len=8192
47 |           env:
48 |             - name: HF_HOME
49 |               value: /root/.cache/huggingface
50 |             - name: HF_HUB_CACHE
51 |               value: /root/.cache/huggingface/hub
52 |             - name: NEURON_RT_NUM_CORES
53 |               value: "2"
54 |             - name: NEURON_RT_VISIBLE_CORES
55 |               value: "0-1"
56 |           ports:
57 |             - name: http
58 |               containerPort: 8000
59 |           resources:
60 |             requests:
61 |               cpu: 3 #75%
62 |               memory: 12Gi #75%
63 |               aws.amazon.com/neuroncore: 2
64 |             limits:
65 |               aws.amazon.com/neuroncore: 2
66 |       tolerations:
67 |         - key: aws.amazon.com/neuron
68 |           operator: Exists
69 |           effect: NoSchedule
70 | ---
71 | apiVersion: v1
72 | kind: Service
73 | metadata:
74 |   name: deepseek-r1-qwen3-8b-neuron
75 |   namespace: vllm
76 | spec:
77 |   selector:
78 |     app: deepseek-r1-qwen3-8b-neuron
79 |   ports:
80 |     - name: http
81 |       port: 8000
82 | 


--------------------------------------------------------------------------------
/components/llm-model/ollama/index.mjs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env zx
 2 | 
 3 | import { fileURLToPath } from "url";
 4 | import path from "path";
 5 | import { $ } from "zx";
 6 | $.verbose = true;
 7 | 
 8 | export const name = "Ollama";
 9 | const __filename = fileURLToPath(import.meta.url);
10 | const DIR = path.dirname(__filename);
11 | let BASE_DIR;
12 | let config;
13 | let utils;
14 | 
15 | export async function init(_BASE_DIR, _config, _utils) {
16 |   BASE_DIR = _BASE_DIR;
17 |   config = _config;
18 |   utils = _utils;
19 | }
20 | 
21 | export async function install() {
22 |   // const requiredEnvVars = [];
23 |   // utils.checkRequiredEnvVars(requiredEnvVars);
24 |   await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`;
25 |   await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`;
26 |   const configmapTemplatePath = path.join(DIR, "configmap.template.yaml");
27 |   const configmapRenderedPath = path.join(DIR, "configmap.rendered.yaml");
28 |   const { models } = config["llm-model"]["ollama"];
29 |   const configmapVars = {
30 |     models: models.map((model) => `"${model}"`).join(" "),
31 |   };
32 |   utils.renderTemplate(configmapTemplatePath, configmapRenderedPath, configmapVars);
33 |   await $`kubectl apply -f ${configmapRenderedPath}`;
34 |   const deploymentTemplatePath = path.join(DIR, "deployment.template.yaml");
35 |   const deploymentRenderedPath = path.join(DIR, "deployment.rendered.yaml");
36 |   const { EKS_MODE } = process.env;
37 |   const deploymentVars = {
38 |     KARPENTER_PREFIX: EKS_MODE === "auto" ? "eks.amazonaws.com" : "karpenter.k8s.aws",
39 |   };
40 |   utils.renderTemplate(deploymentTemplatePath, deploymentRenderedPath, deploymentVars);
41 |   await $`kubectl apply -f ${deploymentRenderedPath}`;
42 |   await $`kubectl apply -f ${path.join(DIR, "service.yaml")}`;
43 |   const ingressTemplatePath = path.join(DIR, "ingress.template.yaml");
44 |   const ingressRenderedPath = path.join(DIR, "ingress.rendered.yaml");
45 |   const ingressVars = {
46 |     DOMAIN: process.env.DOMAIN,
47 |   };
48 |   utils.renderTemplate(ingressTemplatePath, ingressRenderedPath, ingressVars);
49 |   await $`kubectl apply -f ${ingressRenderedPath}`;
50 | }
51 | 
52 | export async function uninstall() {
53 |   await $`kubectl delete -f ${path.join(DIR, "ingress.rendered.yaml")} --ignore-not-found`;
54 |   await $`kubectl delete -f ${path.join(DIR, "service.yaml")} --ignore-not-found`;
55 |   await $`kubectl delete -f ${path.join(DIR, "deployment.rendered.yaml")} --ignore-not-found`;
56 |   await $`kubectl delete -f ${path.join(DIR, "configmap.rendered.yaml")} --ignore-not-found`;
57 |   await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`;
58 |   await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`;
59 | }
60 | 


--------------------------------------------------------------------------------
/components/ai-gateway/litellm/main.tf:
--------------------------------------------------------------------------------
 1 | variable "region" {
 2 |   type    = string
 3 |   default = "us-west-2"
 4 | }
 5 | variable "bedrock_region" {
 6 |   type    = string
 7 |   default = "us-west-2"
 8 | }
 9 | variable "name" {
10 |   type    = string
11 |   default = "genai-on-eks"
12 | }
13 | variable "enable_bedrock_guardrail" {
14 |   type    = bool
15 |   default = false
16 | }
17 | terraform {
18 |   required_providers {
19 |     aws = {
20 |       source  = "hashicorp/aws"
21 |       version = "~> 5.96.0"
22 |     }
23 |   }
24 | }
25 | provider "aws" {
26 |   region = var.region
27 | }
28 | provider "aws" {
29 |   alias  = "bedrock"
30 |   region = var.bedrock_region
31 | }
32 | 
33 | module "pod_identity" {
34 |   source  = "terraform-aws-modules/eks-pod-identity/aws"
35 |   version = "1.12.0"
36 | 
37 |   name                 = "${var.name}-${var.region}-litellm"
38 |   use_name_prefix      = false
39 |   attach_custom_policy = true
40 |   policy_statements = [
41 |     {
42 |       sid = "Bedrock"
43 |       actions = [
44 |         "bedrock:InvokeModel",
45 |         "bedrock:InvokeModelWithResponseStream",
46 |         "aws-marketplace:Subscribe",
47 |         "aws-marketplace:ViewSubscriptions",
48 |         "bedrock:ApplyGuardrail"
49 |       ]
50 |       resources = ["*"]
51 |     }
52 |   ]
53 |   associations = {
54 |     litellm = {
55 |       service_account = "litellm"
56 |       namespace       = "litellm"
57 |       cluster_name    = var.name
58 |     }
59 |   }
60 | }
61 | 
62 | resource "aws_bedrock_guardrail" "this" {
63 |   count                     = var.enable_bedrock_guardrail ? 1 : 0
64 |   provider                  = aws.bedrock
65 |   name                      = var.name
66 |   blocked_input_messaging   = "Sorry, the model cannot answer this question."
67 |   blocked_outputs_messaging = "Sorry, the model cannot answer this question."
68 |   description               = var.name
69 |   contextual_grounding_policy_config {
70 |     filters_config {
71 |       threshold = 0.7
72 |       type      = "GROUNDING"
73 |     }
74 |     filters_config {
75 |       threshold = 0.7
76 |       type      = "RELEVANCE"
77 |     }
78 |   }
79 |   word_policy_config {
80 |     managed_word_lists_config {
81 |       type = "PROFANITY"
82 |     }
83 |   }
84 | }
85 | output "bedrock_guardrail_id" {
86 |   value = var.enable_bedrock_guardrail ? aws_bedrock_guardrail.this[0].guardrail_id : ""
87 | }
88 | resource "aws_bedrock_guardrail_version" "this" {
89 |   count         = var.enable_bedrock_guardrail ? 1 : 0
90 |   provider      = aws.bedrock
91 |   description   = var.name
92 |   guardrail_arn = aws_bedrock_guardrail.this[0].guardrail_arn
93 | }
94 | output "bedrock_guardrail_version" {
95 |   value = var.enable_bedrock_guardrail ? aws_bedrock_guardrail_version.this[0].version : ""
96 | }


--------------------------------------------------------------------------------
/components/embedding-model/tei/model-qwen3-embedding-06b-bf16-cpu.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-embedding-06b-bf16-cpu
 5 |   namespace: tei
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-embedding-06b-bf16-cpu
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-embedding-06b-bf16-cpu
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         kubernetes.io/arch: amd64
22 |         # karpenter.sh/capacity-type: on-demand
23 |         # {{{KARPENTER_PREFIX}}}/instance-category: m
24 |         # {{{KARPENTER_PREFIX}}}/instance-generation: "7"
25 |         {{{KARPENTER_PREFIX}}}/instance-family: r7i
26 |         # node.kubernetes.io/instance-type: r7i.xlarge
27 |       containers:
28 |         - name: tei
29 |           image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.8.2
30 |           imagePullPolicy: IfNotPresent
31 |           securityContext:
32 |             allowPrivilegeEscalation: false
33 |             capabilities:
34 |               drop:
35 |                 - NET_RAW
36 |             seccompProfile:
37 |               type: RuntimeDefault
38 |           args:
39 |             - --model-id=Qwen/Qwen3-Embedding-0.6B
40 |             - --huggingface-hub-cache=/root/.cache/huggingface/hub
41 |             - --max-batch-tokens=8192
42 |           env:
43 |             - name: HF_TOKEN
44 |               valueFrom:
45 |                 secretKeyRef:
46 |                   name: hf-token
47 |                   key: token
48 |           ports:
49 |             - name: http
50 |               containerPort: 80
51 |           resources:
52 |             requests:
53 |               cpu: 3.6 #90%
54 |               memory: 29Gi #90%
55 |             limits:
56 |               cpu: 3.6 #90%
57 |               memory: 29Gi #90%
58 |           volumeMounts:
59 |             - name: huggingface-cache
60 |               mountPath: /root/.cache/huggingface
61 |             - name: shm
62 |               mountPath: /dev/shm
63 |       volumes:
64 |         - name: huggingface-cache
65 |           persistentVolumeClaim:
66 |             claimName: huggingface-cache
67 |         - name: shm
68 |           emptyDir:
69 |             medium: Memory
70 |             sizeLimit: 10Gi
71 |       tolerations:
72 |         - key: nvidia.com/gpu
73 |           operator: Exists
74 |           effect: NoSchedule
75 | ---
76 | apiVersion: v1
77 | kind: Service
78 | metadata:
79 |   name: qwen3-embedding-06b-bf16-cpu
80 |   namespace: tei
81 | spec:
82 |   selector:
83 |     app: qwen3-embedding-06b-bf16-cpu
84 |   ports:
85 |     - name: http
86 |       port: 80
87 | 


--------------------------------------------------------------------------------
/components/llm-model/sglang/model-qwen3-coder-30b-fp8.template.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: qwen3-coder-30b-fp8
 5 |   namespace: sglang
 6 | spec:
 7 |   replicas: 1
 8 |   selector:
 9 |     matchLabels:
10 |       app: qwen3-coder-30b-fp8
11 |   template:
12 |     metadata:
13 |       labels:
14 |         app: qwen3-coder-30b-fp8
15 |     spec:
16 |       securityContext:
17 |         seccompProfile:
18 |           type: RuntimeDefault
19 |       automountServiceAccountToken: false
20 |       nodeSelector:
21 |         {{{KARPENTER_PREFIX}}}/instance-family: g6e
22 |       containers:
23 |         - name: sglang
24 |           image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126
25 |           imagePullPolicy: IfNotPresent
26 |           securityContext:
27 |             allowPrivilegeEscalation: false
28 |             capabilities:
29 |               drop:
30 |                 - NET_RAW
31 |             seccompProfile:
32 |               type: RuntimeDefault
33 |           command: ["python3", "-m", "sglang.launch_server"]
34 |           args:
35 |             - --model-path=Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8
36 |             - --host=0.0.0.0
37 |             - --port=30000
38 |             - --trust-remote-code
39 |             - --mem-fraction-static=0.90
40 |             # - --context-length=32768 # 32K
41 |             - --context-length=131072 # 128K
42 |             # Qwen3 specific
43 |             - --tool-call-parser=qwen25
44 |             - --reasoning-parser=qwen3
45 |           env:
46 |             - name: HF_TOKEN
47 |               valueFrom:
48 |                 secretKeyRef:
49 |                   name: hf-token
50 |                   key: token
51 |           ports:
52 |             - name: http
53 |               containerPort: 30000
54 |           resources:
55 |             requests:
56 |               cpu: 3 #75%
57 |               memory: 24Gi #75%
58 |               nvidia.com/gpu: 1
59 |             limits:
60 |               nvidia.com/gpu: 1
61 |           volumeMounts:
62 |             - name: huggingface-cache
63 |               mountPath: /root/.cache/huggingface
64 |             - name: shm
65 |               mountPath: /dev/shm
66 |       volumes:
67 |         - name: huggingface-cache
68 |           persistentVolumeClaim:
69 |             claimName: huggingface-cache
70 |         - name: shm
71 |           emptyDir:
72 |             medium: Memory
73 |             sizeLimit: 10Gi
74 |       tolerations:
75 |         - key: nvidia.com/gpu
76 |           operator: Exists
77 |           effect: NoSchedule
78 | ---
79 | apiVersion: v1
80 | kind: Service
81 | metadata:
82 |   name: qwen3-coder-30b-fp8
83 |   namespace: sglang
84 | spec:
85 |   selector:
86 |     app: qwen3-coder-30b-fp8
87 |   ports:
88 |     - name: http
89 |       port: 30000
90 | 


--------------------------------------------------------------------------------