├── .prettierignore ├── examples ├── agno │ ├── calculator-agent │ │ ├── __init__.py │ │ ├── requirements.txt │ │ ├── Dockerfile │ │ ├── .terraform.lock.hcl │ │ ├── main.tf │ │ ├── openwebui_pipe_function.py │ │ ├── agent.template.yaml │ │ └── index.mjs │ └── namespace.yaml ├── mcp-server │ ├── calculator │ │ ├── __init__.py │ │ ├── requirements.txt │ │ ├── Dockerfile │ │ ├── main.tf │ │ ├── mcp-server.template.yaml │ │ ├── .terraform.lock.hcl │ │ ├── server.py │ │ └── index.mjs │ └── namespace.yaml └── strands-agents │ ├── calculator-agent │ ├── __init__.py │ ├── requirements.txt │ ├── Dockerfile │ ├── .terraform.lock.hcl │ ├── main.tf │ ├── openwebui_pipe_function.py │ ├── agent.template.yaml │ └── index.mjs │ └── namespace.yaml ├── workshops ├── eks-genai-workshop │ ├── .env.workshop │ ├── static │ │ ├── aws-logo.png │ │ ├── images │ │ │ ├── module-1 │ │ │ │ ├── flies.png │ │ │ │ ├── logs.png │ │ │ │ ├── models.png │ │ │ │ ├── vllm.png │ │ │ │ ├── sign-up.png │ │ │ │ ├── get-started.png │ │ │ │ ├── bedrock-review-submit.png │ │ │ │ ├── claude-with-rag-context.png │ │ │ │ ├── bedrock-model-access-next.png │ │ │ │ ├── bedrock-model-access-page.png │ │ │ │ ├── claude-without-rag-context.png │ │ │ │ ├── bedrock-select-claude-sonnet.png │ │ │ │ ├── openwebui-create-knowledge-base.png │ │ │ │ ├── openwebui-knowledge-workspace.png │ │ │ │ ├── openwebui-select-knowledge-base.png │ │ │ │ ├── model-comparison-claude-vs-llama.png │ │ │ │ └── claude-kubernetes-operators-response.png │ │ │ ├── module-2 │ │ │ │ ├── snake.png │ │ │ │ ├── trace.png │ │ │ │ ├── traces.png │ │ │ │ ├── copy-key.png │ │ │ │ ├── langfuse.png │ │ │ │ ├── test-key.png │ │ │ │ ├── create-key.png │ │ │ │ ├── dashboard.png │ │ │ │ ├── generation.png │ │ │ │ ├── new-model.png │ │ │ │ ├── virtual-key.png │ │ │ │ ├── final-widget.png │ │ │ │ ├── go-to-project.png │ │ │ │ ├── back-dashboard.png │ │ │ │ ├── cost-dashboard.png │ │ │ │ ├── create-dashboard.png │ │ │ │ ├── final-dashboard.png │ │ │ │ ├── langfuse-login.png │ │ │ │ ├── select-test-key.png │ │ │ │ ├── specific-trace.png │ │ │ │ ├── value-rendered.png │ │ │ │ ├── latency-dashboard.png │ │ │ │ ├── litellm-login-page.png │ │ │ │ ├── cost-dashboard-long.png │ │ │ │ ├── litellm-test-interface.png │ │ │ │ ├── litellm-usage-analytics.png │ │ │ │ ├── litellm-api-landing-page.png │ │ │ │ └── litellm-models-dashboard.png │ │ │ ├── introduction │ │ │ │ ├── url.png │ │ │ │ ├── archi.png │ │ │ │ ├── open-aws-console.png │ │ │ │ ├── workshopstudio-event1.jpg │ │ │ │ ├── workshopstudio-event2.jpg │ │ │ │ └── workshopstudio-event3.jpg │ │ │ └── module-3 │ │ │ │ ├── example1.png │ │ │ │ ├── example2.png │ │ │ │ ├── gen-ai-on-eks.png │ │ │ │ └── LoanBuddy-Observability.png │ │ ├── configuration │ │ │ └── index.en.md │ │ ├── code │ │ │ └── module3 │ │ │ │ └── credit-validation │ │ │ │ ├── example1.png │ │ │ │ ├── example2.png │ │ │ │ ├── .gitignore │ │ │ │ ├── Dockerfile │ │ │ │ ├── .example.env │ │ │ │ ├── pyvenv.cfg │ │ │ │ ├── build-push-docker-image.sh │ │ │ │ └── requirements.txt │ │ └── iam-policy.json │ ├── config.workshop.json │ └── content │ │ └── introduction │ │ └── getting-started │ │ └── index.en.md └── README.md ├── .prettierrc ├── components ├── llm-model │ ├── tgi │ │ ├── namespace.yaml │ │ ├── secret.template.yaml │ │ ├── archive │ │ │ ├── pvc-neuron-cache.yaml │ │ │ ├── job-tgi-neuron-build.template.yaml │ │ │ └── main.tf │ │ ├── pvc-huggingface-cache.yaml │ │ ├── index.mjs │ │ ├── model-qwen3-8b.template.yaml │ │ ├── model-qwen3-8b-fp8.template.yaml │ │ └── model-deepseek-r1-qwen3-8b.template.yaml │ ├── vllm │ │ ├── namespace.yaml │ │ ├── secret.template.yaml │ │ ├── pvc-neuron-cache.yaml │ │ ├── pvc-huggingface-cache.yaml │ │ ├── archive │ │ │ ├── job-vllm-neuron-build.template.yaml │ │ │ └── main.tf │ │ ├── index.mjs │ │ ├── model-gemma3-27b-gptq.template.yaml │ │ ├── model-deepseek-r1-qwen3-8b.template.yaml │ │ ├── model-qwen3-30b-thinking-fp8.template.yaml │ │ ├── model-gpt-oss-20b.template.yaml │ │ ├── model-qwen3-30b-instruct-fp8.template.yaml │ │ ├── model-qwen3-32b-fp8.template.yaml │ │ ├── model-qwen3-coder-30b-fp8.template.yaml │ │ ├── model-gpt-oss-120b.template.yaml │ │ ├── model-qwen3-8b-neuron.template.yaml │ │ ├── model-magistral-24b-fp8.template.yaml │ │ └── model-deepseek-r1-qwen3-8b-neuron.template.yaml │ ├── ollama │ │ ├── namespace.yaml │ │ ├── service.yaml │ │ ├── pvc.yaml │ │ ├── configmap.template.yaml │ │ ├── ingress.template.yaml │ │ ├── deployment.template.yaml │ │ └── index.mjs │ └── sglang │ │ ├── namespace.yaml │ │ ├── secret.template.yaml │ │ ├── pvc.yaml │ │ ├── index.mjs │ │ ├── model-gpt-oss-20b.template.yaml │ │ ├── model-qwen3-32b-fp8.template.yaml │ │ ├── model-qwen3-30b-instruct-fp8.template.yaml │ │ ├── model-qwen3-30b-thinking-fp8.template.yaml │ │ └── model-qwen3-coder-30b-fp8.template.yaml ├── embedding-model │ └── tei │ │ ├── namespace.yaml │ │ ├── secret.template.yaml │ │ ├── pvc.yaml │ │ ├── index.mjs │ │ ├── model-qwen3-embedding-4b-bf16.template.yaml │ │ ├── model-qwen3-embedding-8b-bf16.template.yaml │ │ ├── model-qwen3-embedding-06b-bf16.template.yaml │ │ └── model-qwen3-embedding-06b-bf16-cpu.template.yaml ├── guardrail │ └── guardrails-ai │ │ ├── docker │ │ ├── requirements.txt │ │ ├── config.py │ │ ├── docker-entrypoint.sh │ │ ├── Dockerfile │ │ └── build-image.sh │ │ ├── GUARDRAILS_AI.md │ │ ├── app.template.yaml │ │ ├── index.mjs │ │ └── .terraform.lock.hcl ├── vector-database │ ├── milvus │ │ ├── secret.template.yaml │ │ ├── ingress.template.yaml │ │ ├── values.template.yaml │ │ └── main.tf │ ├── qdrant │ │ ├── secret.template.yaml │ │ ├── values.template.yaml │ │ └── ingress.template.yaml │ └── chroma │ │ ├── values.template.yaml │ │ └── index.mjs ├── o11y │ ├── phoenix │ │ ├── values.template.yaml │ │ └── index.mjs │ ├── mlflow │ │ ├── values.template.yaml │ │ ├── .terraform.lock.hcl │ │ ├── index.mjs │ │ └── main.tf │ └── langfuse │ │ ├── .terraform.lock.hcl │ │ ├── index.mjs │ │ └── main.tf ├── workflow-automation │ └── n8n │ │ ├── values.template.yaml │ │ └── index.mjs ├── gui-app │ └── openwebui │ │ ├── values.template.yaml │ │ └── index.mjs └── ai-gateway │ ├── kong │ ├── kong.template.yaml │ ├── KONG.md │ ├── index.mjs │ ├── examples │ │ └── kong.yaml │ └── values.template.yaml │ └── litellm │ ├── .terraform.lock.hcl │ └── main.tf ├── .vscode └── settings.json ├── assets ├── openwebui_functions.png ├── openwebui_embedding_model.png └── openwebui_embedding_calculator_agent.png ├── .gitleaks.toml ├── CODE_OF_CONDUCT.md ├── package.json ├── .gitignore ├── .env ├── docs └── INFRA_SETUP.md ├── terraform ├── eks.tf.template ├── vpc.tf ├── modules │ ├── eks-standard-mode │ │ └── variables.tf │ └── eks-auto-mode │ │ └── variables.tf ├── efs.tf ├── alb-acm.tf └── variables.tf ├── LICENSE ├── cli-menu.json └── ecr-image-sync.sh /.prettierignore: -------------------------------------------------------------------------------- 1 | *.template.yaml 2 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/.env.workshop: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 120 3 | } 4 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/requirements.txt: -------------------------------------------------------------------------------- 1 | fastmcp==2.13.0 -------------------------------------------------------------------------------- /workshops/README.md: -------------------------------------------------------------------------------- 1 | Host code/settings and md files for the workshops here 2 | -------------------------------------------------------------------------------- /examples/agno/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: agno 5 | -------------------------------------------------------------------------------- /components/llm-model/tgi/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: tgi 5 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "agentic", 4 | "configmap" 5 | ] 6 | } -------------------------------------------------------------------------------- /components/llm-model/vllm/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: vllm 5 | -------------------------------------------------------------------------------- /examples/mcp-server/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: mcp-server 5 | -------------------------------------------------------------------------------- /components/embedding-model/tei/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: tei 5 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | guardrails-ai[api]>=0.5.0 2 | gunicorn[gthread]>=22.0.0,<23 -------------------------------------------------------------------------------- /components/llm-model/ollama/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: ollama 5 | -------------------------------------------------------------------------------- /components/llm-model/sglang/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: sglang 5 | -------------------------------------------------------------------------------- /examples/strands-agents/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: strands-agents 5 | -------------------------------------------------------------------------------- /assets/openwebui_functions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_functions.png -------------------------------------------------------------------------------- /assets/openwebui_embedding_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_embedding_model.png -------------------------------------------------------------------------------- /assets/openwebui_embedding_calculator_agent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/assets/openwebui_embedding_calculator_agent.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/aws-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/aws-logo.png -------------------------------------------------------------------------------- /.gitleaks.toml: -------------------------------------------------------------------------------- 1 | [allowlist] 2 | description = "Ignore specific test/dummy keys" 3 | regexes = [ 4 | '''LITELLM_API_KEY''', 5 | '''LANGFUSE_PUBLIC_KEY''', 6 | '''LANGFUSE_SECRET_KEY''' 7 | ] -------------------------------------------------------------------------------- /components/llm-model/tgi/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: hf-token 5 | namespace: tgi 6 | type: Opaque 7 | stringData: 8 | token: {{{HF_TOKEN}}} -------------------------------------------------------------------------------- /components/embedding-model/tei/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: hf-token 5 | namespace: tei 6 | type: Opaque 7 | stringData: 8 | token: {{{HF_TOKEN}}} -------------------------------------------------------------------------------- /components/llm-model/sglang/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: hf-token 5 | namespace: sglang 6 | type: Opaque 7 | stringData: 8 | token: {{{HF_TOKEN}}} -------------------------------------------------------------------------------- /components/llm-model/vllm/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: hf-token 5 | namespace: vllm 6 | type: Opaque 7 | stringData: 8 | token: {{{HF_TOKEN}}} 9 | -------------------------------------------------------------------------------- /components/vector-database/milvus/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: basic-auth 5 | namespace: milvus 6 | type: Opaque 7 | stringData: 8 | auth: {{{AUTH}}} -------------------------------------------------------------------------------- /components/vector-database/qdrant/secret.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: basic-auth 5 | namespace: qdrant 6 | type: Opaque 7 | stringData: 8 | auth: {{{AUTH}}} -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/flies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/flies.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/logs.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/models.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/vllm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/vllm.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/snake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/snake.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/trace.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/traces.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/url.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/url.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/sign-up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/sign-up.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/copy-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/copy-key.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/langfuse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/langfuse.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/test-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/test-key.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-3/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/example1.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-3/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/example2.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/archi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/archi.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/get-started.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/get-started.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/create-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/create-key.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/generation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/generation.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/new-model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/new-model.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/virtual-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/virtual-key.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/final-widget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/final-widget.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/go-to-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/go-to-project.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-3/gen-ai-on-eks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/gen-ai-on-eks.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/back-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/back-dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/cost-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/create-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/create-dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/final-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/final-dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/langfuse-login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/langfuse-login.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/select-test-key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/select-test-key.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/specific-trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/specific-trace.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/value-rendered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/value-rendered.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/configuration/index.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | title : "Configuration" 3 | weight : 20 4 | --- 5 | 6 | # Configuration 7 | Find out how to create and organize your content quickly and intuitively. 8 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/latency-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/latency-dashboard.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/litellm-login-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-login-page.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/open-aws-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/open-aws-console.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/bedrock-review-submit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-review-submit.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/cost-dashboard-long.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/cost-dashboard-long.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/code/module3/credit-validation/example1.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/code/module3/credit-validation/example2.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/claude-with-rag-context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-with-rag-context.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/litellm-test-interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-test-interface.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/litellm-usage-analytics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-usage-analytics.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-3/LoanBuddy-Observability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-3/LoanBuddy-Observability.png -------------------------------------------------------------------------------- /examples/agno/calculator-agent/requirements.txt: -------------------------------------------------------------------------------- 1 | agno==1.7.0 2 | boto3==1.39.1 3 | openai==1.93.0 4 | mcp==1.10.0 5 | fastapi[standard]==0.115.13 6 | pydantic==2.11.7 7 | langfuse==3.0.5 8 | openlit==1.34.23 9 | SQLAlchemy==2.0.41 10 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event1.jpg -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event2.jpg -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/introduction/workshopstudio-event3.jpg -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-next.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-next.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-model-access-page.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/claude-without-rag-context.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-without-rag-context.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/litellm-api-landing-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-api-landing-page.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-2/litellm-models-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-2/litellm-models-dashboard.png -------------------------------------------------------------------------------- /components/llm-model/ollama/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: ollama 5 | namespace: ollama 6 | spec: 7 | selector: 8 | app: ollama 9 | ports: 10 | - name: http 11 | port: 11434 12 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/bedrock-select-claude-sonnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/bedrock-select-claude-sonnet.png -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/requirements.txt: -------------------------------------------------------------------------------- 1 | strands-agents==0.1.8 2 | strands-agents[otel]==0.1.8 3 | strands-agents-tools==0.1.6 4 | mcp==1.10.0 5 | fastapi[standard]==0.115.13 6 | pydantic==2.11.7 7 | litellm==1.73.0 8 | langfuse==3.0.5 -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/openwebui-create-knowledge-base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-create-knowledge-base.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/openwebui-knowledge-workspace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-knowledge-workspace.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/openwebui-select-knowledge-base.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/openwebui-select-knowledge-base.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/model-comparison-claude-vs-llama.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/model-comparison-claude-vs-llama.png -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/images/module-1/claude-kubernetes-operators-response.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/sample-genai-on-eks-starter-kit/HEAD/workshops/eks-genai-workshop/static/images/module-1/claude-kubernetes-operators-response.png -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/docker/config.py: -------------------------------------------------------------------------------- 1 | from guardrails.hub import DetectPII 2 | from guardrails import Guard 3 | 4 | guard = Guard() 5 | guard.name = "detect-pii" 6 | guard.use(DetectPII(pii_entities=["EMAIL_ADDRESS", "IP_ADDRESS"], on_fail="fix")) 7 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/.gitignore: -------------------------------------------------------------------------------- 1 | # Python libraries and virtual environments 2 | venv/ 3 | env/ 4 | lib/ 5 | bin/ 6 | include/ 7 | __pycache__/ 8 | *.pyc 9 | *.pyo 10 | *.egg-info/ 11 | dist/ 12 | build/ 13 | .env 14 | -------------------------------------------------------------------------------- /components/vector-database/chroma/values.template.yaml: -------------------------------------------------------------------------------- 1 | chromadb: 2 | auth: 3 | enabled: false 4 | data: 5 | storageClass: ebs 6 | volumeSize: 10Gi 7 | 8 | resources: 9 | requests: 10 | cpu: 1 11 | memory: 2Gi 12 | limits: 13 | memory: 2Gi 14 | -------------------------------------------------------------------------------- /components/llm-model/ollama/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: ollama-cache 5 | namespace: ollama 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | COPY requirements.txt . 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | COPY __init__.py . 7 | COPY agent.py . 8 | EXPOSE 80 9 | CMD ["fastapi", "run", "agent.py", "--proxy-headers", "--port", "80"] 10 | -------------------------------------------------------------------------------- /components/embedding-model/tei/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: huggingface-cache 5 | namespace: tei 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /components/llm-model/sglang/pvc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: huggingface-cache 5 | namespace: sglang 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /components/llm-model/vllm/pvc-neuron-cache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: neuron-cache 5 | namespace: vllm 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | COPY requirements.txt . 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | COPY __init__.py . 7 | COPY agent.py . 8 | EXPOSE 80 9 | CMD ["fastapi", "run", "agent.py", "--proxy-headers", "--port", "80"] 10 | -------------------------------------------------------------------------------- /components/llm-model/tgi/archive/pvc-neuron-cache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: neuron-cache 5 | namespace: tgi 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /components/llm-model/tgi/pvc-huggingface-cache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: huggingface-cache 5 | namespace: tgi 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /components/llm-model/vllm/pvc-huggingface-cache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: PersistentVolumeClaim 3 | metadata: 4 | name: huggingface-cache 5 | namespace: vllm 6 | spec: 7 | storageClassName: efs 8 | accessModes: 9 | - ReadWriteMany 10 | resources: 11 | requests: 12 | storage: 100Gi # Dummy 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | COPY requirements.txt . 5 | RUN pip install --no-cache-dir -r requirements.txt 6 | COPY __init__.py . 7 | COPY server.py . 8 | EXPOSE 8000 9 | CMD ["fastmcp", "run", "server.py", "--transport", "http", "--host", "0.0.0.0", "--port", "8000"] 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "genai-on-eks-starter-kit", 3 | "version": "1.0.0", 4 | "type": "module", 5 | "dependencies": { 6 | "commander": "11.1.0", 7 | "dotenv": "16.5.0", 8 | "handlebars": "4.7.8", 9 | "inquirer": "12.6.3", 10 | "lodash": "4.17.21", 11 | "prettier": "3.5.3", 12 | "zx": "8.5.4" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Terraform 2 | .terraform/ 3 | *.tfstate 4 | *.tfstate.* 5 | *.tfvars 6 | *.tfvars.json 7 | 8 | # Node.js 9 | node_modules/ 10 | 11 | # Python 12 | __pycache__/ 13 | 14 | # Project 15 | .DS_Store 16 | .kiro/ 17 | .backup/ 18 | .temp/ 19 | .kubeconfig 20 | .env.local 21 | config.local.json 22 | *.rendered.yaml 23 | test*.py 24 | test*.mjs 25 | terraform/eks.tf 26 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/iam-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "cloudformation:ListStacks", 8 | "cloudformation:DescribeStacks", 9 | "cloudformation:DescribeStackEvents", 10 | "cloudformation:DescribeStackResources", 11 | "cloudformation:GetTemplate" 12 | ], 13 | "Resource": "*" 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /components/llm-model/ollama/configmap.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: ollama-init-script 5 | namespace: ollama 6 | data: 7 | init-ollama.sh: | 8 | #!/bin/bash 9 | 10 | while ! /bin/ollama ps > /dev/null 2>&1; do 11 | sleep 5 12 | done 13 | 14 | models=({{{models}}}) 15 | 16 | for model in "${models[@]}"; do 17 | echo "Pulling model: $model" 18 | /bin/ollama pull "$model" 19 | done 20 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt . 6 | RUN pip install --no-cache-dir -r requirements.txt 7 | 8 | COPY credit-underwriting-agent.py . 9 | COPY mcp-address-validator.py . 10 | COPY mcp-image-processor.py . 11 | COPY mcp-income-employment-validator.py . 12 | 13 | COPY utils.py . 14 | COPY *.png . 15 | 16 | EXPOSE 8080 17 | 18 | ENV PYTHONUNBUFFERED=1 19 | 20 | 21 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/.example.env: -------------------------------------------------------------------------------- 1 | # Gateway Configuration 2 | GATEWAY_MODEL_ACCESS_KEY=your_model_access_key_here 3 | GATEWAY_URL=https://your-api-gateway-url.amazonaws.com 4 | 5 | # Langfuse Configuration (Optional - for tracing) 6 | LANGFUSE_URL=https://your-langfuse-instance.com 7 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key 8 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key 9 | 10 | # S3 Configuration 11 | S3_BUCKET_NAME=your-s3-bucket-name 12 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/pyvenv.cfg: -------------------------------------------------------------------------------- 1 | home = /opt/homebrew/opt/python@3.13/bin 2 | include-system-site-packages = false 3 | version = 3.13.3 4 | executable = /opt/homebrew/Cellar/python@3.13/3.13.3_1/Frameworks/Python.framework/Versions/3.13/bin/python3.13 5 | command = /opt/homebrew/opt/python@3.13/bin/python3.13 -m venv /Users/wangaws/Documents/Workshop/sample-genai-on-eks-starter-kit/workshops/module-3-building-genai-applications-DEPRECATED/code/credit-validation 6 | -------------------------------------------------------------------------------- /components/o11y/phoenix/values.template.yaml: -------------------------------------------------------------------------------- 1 | ingress: 2 | annotations: 3 | alb.ingress.kubernetes.io/target-type: ip 4 | {{#if DOMAIN}} 5 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 6 | host: phoenix.{{{DOMAIN}}} 7 | {{/if}} 8 | 9 | resources: 10 | requests: 11 | cpu: 500m 12 | memory: 2Gi 13 | limits: 14 | memory: 2Gi 15 | 16 | postgresql: 17 | primary: 18 | resources: 19 | requests: 20 | cpu: 125m 21 | memory: 256Mi 22 | limits: 23 | memory: 256Mi 24 | -------------------------------------------------------------------------------- /components/workflow-automation/n8n/values.template.yaml: -------------------------------------------------------------------------------- 1 | ingress: 2 | enabled: true 3 | annotations: 4 | alb.ingress.kubernetes.io/target-type: ip 5 | {{#if DOMAIN}} 6 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 7 | hosts: 8 | - host: n8n.{{{DOMAIN}}} 9 | {{else}} 10 | hosts: 11 | - host: 12 | {{/if}} 13 | paths: ["/"] 14 | tls: [] 15 | 16 | main: 17 | {{#unless DOMAIN}} 18 | extraEnv: 19 | N8N_SECURE_COOKIE: 20 | value: "false" 21 | {{/unless}} 22 | resources: 23 | requests: 24 | cpu: 1 25 | memory: 2Gi 26 | limits: 27 | memory: 2Gi 28 | -------------------------------------------------------------------------------- /components/llm-model/ollama/ingress.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | annotations: 5 | alb.ingress.kubernetes.io/target-type: ip 6 | {{#if DOMAIN}} 7 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 8 | {{/if}} 9 | name: ollama 10 | namespace: ollama 11 | spec: 12 | rules: 13 | - http: 14 | paths: 15 | - backend: 16 | service: 17 | name: ollama 18 | port: 19 | name: http 20 | path: / 21 | pathType: Prefix 22 | {{#if DOMAIN}} 23 | host: ollama.{{{DOMAIN}}} 24 | {{/if}} -------------------------------------------------------------------------------- /components/vector-database/qdrant/values.template.yaml: -------------------------------------------------------------------------------- 1 | ingress: 2 | enabled: true 3 | annotations: 4 | external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only 5 | nginx.ingress.kubernetes.io/auth-type: basic 6 | nginx.ingress.kubernetes.io/auth-secret: basic-auth 7 | nginx.ingress.kubernetes.io/auth-realm: "Authentication Required" 8 | ingressClassName: nginx 9 | hosts: 10 | - paths: 11 | - path: / 12 | pathType: Prefix 13 | servicePort: 6333 14 | {{#if DOMAIN}} 15 | host: qdrant.{{{DOMAIN}}} 16 | {{/if}} 17 | 18 | resources: 19 | requests: 20 | cpu: 1 21 | memory: 2Gi 22 | limits: 23 | memory: 2Gi -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | REGION=us-west-2 2 | EKS_CLUSTER_NAME=genai-on-eks 3 | EKS_MODE=auto 4 | DOMAIN= 5 | HF_TOKEN= 6 | # LiteLLM 7 | LITELLM_UI_USERNAME=admin 8 | LITELLM_UI_PASSWORD=Pass@123 9 | LITELLM_API_KEY=sk-1234 10 | # Kong 11 | KONG_API_KEY=sk-1234 12 | KONG_API_KEY_HEADER=apikey 13 | # Langfuse 14 | LANGFUSE_USERNAME=admin@example.com 15 | LANGFUSE_PASSWORD=Pass@123 16 | LANGFUSE_PUBLIC_KEY=lf_pk_1234567890 17 | LANGFUSE_SECRET_KEY=lf_sk_1234567890 18 | # MLflow 19 | MLFLOW_USERNAME=admin 20 | MLFLOW_PASSWORD=Password@123 # Min 12 characters 21 | # Pheonix 22 | PHOENIX_API_KEY= 23 | # qDrant 24 | QDRANT_USERNAME=admin 25 | QDRANT_PASSWORD=Pass@123 26 | # Milvus 27 | MILVUS_USERNAME=admin 28 | MILVUS_PASSWORD=Pass@123 29 | # Guardrails AI 30 | GUARDRAILS_AI_API_KEY= -------------------------------------------------------------------------------- /docs/INFRA_SETUP.md: -------------------------------------------------------------------------------- 1 | ## Infrastructure Setup 2 | 3 | Several AWS services and Kubernetes components are being provisioned by the main Terraform code under the `terraform` folder. 4 | 5 | ### AWS Services 6 | 7 | - One VPC with private/public subnets and single NAT gateway 8 | - One EKS Auto Mode cluster 9 | - One EFS file system for caching Hugging Face models and etc 10 | - One ACM wildcard certificate for the provided domin 11 | 12 | ### Kubenertes Components 13 | 14 | - Setup Ingress to provision the shared ALB 15 | - Setup ExternalDNS to manage the DNS records for the public facing services 16 | - Setup Ingress NGINX Controller to use HTTP Basic authentication for some public facing services 17 | - Setup EFS CSI driver 18 | - Setup StorageClass for EBS and EFS 19 | -------------------------------------------------------------------------------- /components/vector-database/qdrant/ingress.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | annotations: 5 | alb.ingress.kubernetes.io/target-type: ip 6 | {{#if DOMAIN}} 7 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 8 | {{/if}} 9 | name: qdrant-alb 10 | namespace: ingress-nginx 11 | spec: 12 | ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 13 | rules: 14 | - http: 15 | paths: 16 | - path: / 17 | pathType: Prefix 18 | backend: 19 | service: 20 | name: ingress-nginx-controller 21 | port: 22 | number: 80 23 | {{#if DOMAIN}} 24 | host: qdrant.{{{DOMAIN}}} 25 | {{/if}} -------------------------------------------------------------------------------- /components/gui-app/openwebui/values.template.yaml: -------------------------------------------------------------------------------- 1 | nameOverride: openwebui 2 | ingress: 3 | enabled: true 4 | annotations: 5 | alb.ingress.kubernetes.io/target-type: ip 6 | {{#if DOMAIN}} 7 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 8 | host: openwebui.{{{DOMAIN}}} 9 | {{else}} 10 | host: 11 | {{/if}} 12 | 13 | # openaiBaseApiUrl: https://litellm.{{{DOMAIN}}}/v1 14 | openaiBaseApiUrl: http://litellm.litellm:4000/v1 15 | extraEnvVars: 16 | - name: OPENAI_API_KEY 17 | value: {{{LITELLM_API_KEY}}} 18 | 19 | ollama: 20 | enabled: false 21 | pipelines: 22 | enabled: false 23 | 24 | persistence: 25 | enabled: true 26 | size: 100Gi 27 | 28 | resources: 29 | requests: 30 | cpu: 1 31 | memory: 2Gi 32 | limits: 33 | memory: 2Gi 34 | -------------------------------------------------------------------------------- /terraform/eks.tf.template: -------------------------------------------------------------------------------- 1 | module "eks_{{{EKS_MODE}}}_mode" { 2 | source = "./modules/eks-{{{EKS_MODE}}}-mode" 3 | 4 | name = var.name 5 | region = var.region 6 | eks_cluster_version = var.eks_cluster_version 7 | vpc_id = module.vpc.vpc_id 8 | vpc_cidr = var.vpc_cidr 9 | subnet_ids = module.vpc.private_subnets 10 | domain = var.domain 11 | efs_file_system_id = aws_efs_file_system.this.id 12 | gpu_nodepool_capacity_type = var.gpu_nodepool_capacity_type 13 | gpu_nodepool_instance_family = var.gpu_nodepool_instance_family 14 | enable_nginx = var.enable_nginx 15 | enable_lws = var.enable_lws 16 | } 17 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/docker/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "Configuring Guardrails AI..." 5 | 6 | # Check if GUARDRAILS_TOKEN is set 7 | if [ -z "$GUARDRAILS_TOKEN" ]; then 8 | echo "Error: GUARDRAILS_TOKEN environment variable is not set" 9 | exit 1 10 | fi 11 | 12 | # Configure guardrails with token from environment 13 | guardrails configure --disable-metrics --enable-remote-inferencing --token $GUARDRAILS_TOKEN 14 | 15 | # Install validators from hub 16 | echo "Installing validators from Guardrails Hub..." 17 | guardrails hub install hub://guardrails/detect_pii 18 | 19 | echo "Starting Guardrails API server..." 20 | # Start the application 21 | exec gunicorn --bind 0.0.0.0:8000 --timeout=90 --workers=4 'guardrails_api.app:create_app(None, "config.py")' 22 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/GUARDRAILS_AI.md: -------------------------------------------------------------------------------- 1 | # Guardrails AI 2 | 3 | References: 4 | 5 | - https://www.guardrailsai.com/docs/how_to_guides/hosting_with_docker 6 | - https://github.com/guardrails-ai/guardrails-lite-server 7 | - https://github.com/guardrails-ai/detect_pii 8 | 9 | ## Test 10 | 11 | - Via Open WebUI: 12 | 13 | ``` 14 | Validate this email address - genai-on-eks@example.com 15 | 16 | Validate this IP address - 50.0.10.1 17 | 18 | Validate this phone number - 829-456-7890 19 | ``` 20 | 21 | - Via curl to Guardrails Server pod: 22 | 23 | ``` 24 | curl -X 'POST' \ 25 | 'http://localhost:8000/guards/detect-pii/validate' \ 26 | -H 'accept: application/json' \ 27 | -H 'Content-Type: application/json' \ 28 | -d '{ 29 | "llmOutput": "My email address is john.doe@example.com, my IP address and 192.168.1.1, and my phone number is 123-456-7890" 30 | }' 31 | ``` 32 | -------------------------------------------------------------------------------- /terraform/vpc.tf: -------------------------------------------------------------------------------- 1 | locals { 2 | azs = slice(data.aws_availability_zones.available.names, 0, min(length(data.aws_availability_zones.available.names), 4)) 3 | } 4 | 5 | # AWS VPC 6 | module "vpc" { 7 | source = "terraform-aws-modules/vpc/aws" 8 | version = "5.21.0" 9 | 10 | name = var.name 11 | cidr = var.vpc_cidr 12 | 13 | azs = local.azs 14 | private_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 4, k)] 15 | public_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 8, k + 64)] 16 | # intra_subnets = [for k, v in local.azs : cidrsubnet(var.vpc_cidr, 8, k + 80)] 17 | 18 | enable_nat_gateway = true 19 | single_nat_gateway = true 20 | 21 | public_subnet_tags = { 22 | "kubernetes.io/role/elb" = 1 23 | } 24 | 25 | private_subnet_tags = { 26 | "kubernetes.io/role/internal-elb" = 1 27 | "karpenter.sh/discovery" = var.name 28 | } 29 | } 30 | 31 | -------------------------------------------------------------------------------- /components/ai-gateway/kong/kong.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: configuration.konghq.com/v1 2 | kind: KongClusterPlugin 3 | metadata: 4 | name: key-auth 5 | namespace: kong 6 | annotations: 7 | kubernetes.io/ingress.class: kong 8 | labels: 9 | global: "true" 10 | plugin: key-auth 11 | config: 12 | key_in_query: false 13 | key_names: ["{{{KONG_API_KEY_HEADER}}}"] 14 | --- 15 | apiVersion: v1 16 | kind: Secret 17 | metadata: 18 | name: default-credential 19 | namespace: kong 20 | annotations: 21 | kubernetes.io/ingress.class: kong 22 | labels: 23 | konghq.com/credential: key-auth 24 | type: Opaque 25 | stringData: 26 | key: {{{KONG_API_KEY}}} 27 | --- 28 | apiVersion: configuration.konghq.com/v1 29 | kind: KongConsumer 30 | metadata: 31 | name: default 32 | namespace: kong 33 | annotations: 34 | kubernetes.io/ingress.class: kong 35 | username: default 36 | credentials: 37 | - default-credential 38 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | locals { 21 | app = "calculator" 22 | namespace = "mcp-server" 23 | full_name = "${var.name}-${local.namespace}-${local.app}" 24 | } 25 | resource "aws_ecr_repository" "this" { 26 | name = local.full_name 27 | image_tag_mutability = "MUTABLE" 28 | force_delete = true 29 | 30 | image_scanning_configuration { 31 | scan_on_push = true 32 | } 33 | 34 | encryption_configuration { 35 | encryption_type = "KMS" 36 | } 37 | } 38 | output "ecr_repository_url" { 39 | value = aws_ecr_repository.this.repository_url 40 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/mcp-server.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: calculator 5 | namespace: mcp-server 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: calculator 11 | template: 12 | metadata: 13 | labels: 14 | app: calculator 15 | spec: 16 | {{#unless useBuildx}} 17 | nodeSelector: 18 | kubernetes.io/arch: {{{arch}}} 19 | {{/unless}} 20 | containers: 21 | - name: server 22 | image: {{{IMAGE}}} 23 | ports: 24 | - name: http 25 | containerPort: 8000 26 | resources: 27 | requests: 28 | cpu: 250m 29 | memory: 512Mi 30 | limits: 31 | memory: 512Mi 32 | --- 33 | apiVersion: v1 34 | kind: Service 35 | metadata: 36 | name: calculator 37 | namespace: mcp-server 38 | spec: 39 | selector: 40 | app: calculator 41 | ports: 42 | - name: http 43 | port: 8000 44 | 45 | -------------------------------------------------------------------------------- /terraform/modules/eks-standard-mode/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { type = string } 2 | variable "region" { type = string } 3 | variable "eks_cluster_version" { type = string } 4 | variable "vpc_id" { type = string } 5 | variable "vpc_cidr" { type = string } 6 | variable "subnet_ids" { type = list(string) } 7 | variable "domain" { type = string } 8 | variable "efs_file_system_id" { type = string } 9 | variable "gpu_nodepool_capacity_type" { type = list(string) } 10 | variable "gpu_nodepool_instance_family" { type = list(string) } 11 | 12 | terraform { 13 | required_version = ">= 1.5" 14 | 15 | required_providers { 16 | aws = { 17 | source = "hashicorp/aws" 18 | version = "~> 6.15.0" 19 | } 20 | kubernetes = { 21 | source = "hashicorp/kubernetes" 22 | version = "~> 2.38.0" 23 | } 24 | helm = { 25 | source = "hashicorp/helm" 26 | version = "~> 2.17.0" 27 | } 28 | kubectl = { 29 | source = "alekc/kubectl" 30 | version = "~> 2.1.3" 31 | } 32 | local = { 33 | source = "hashicorp/local" 34 | version = "~> 2.5.3" 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /terraform/efs.tf: -------------------------------------------------------------------------------- 1 | # EFS Security Group 2 | resource "aws_security_group" "efs" { 3 | name = "${var.name}-efs-sg" 4 | description = "Security group for EFS" 5 | vpc_id = module.vpc.vpc_id 6 | 7 | ingress { 8 | description = "Allow NFS traffic from private subnets" 9 | from_port = 2049 10 | to_port = 2049 11 | protocol = "tcp" 12 | cidr_blocks = module.vpc.private_subnets_cidr_blocks 13 | } 14 | 15 | tags = { 16 | Name = "${var.name}-efs-sg" 17 | "karpenter.sh/discovery" = var.name 18 | } 19 | } 20 | 21 | resource "aws_efs_file_system" "this" { 22 | creation_token = "${var.name}-efs" 23 | encrypted = true 24 | throughput_mode = var.efs_throughput_mode 25 | 26 | lifecycle_policy { 27 | transition_to_ia = "AFTER_7_DAYS" 28 | } 29 | lifecycle_policy { 30 | transition_to_primary_storage_class = "AFTER_1_ACCESS" 31 | } 32 | tags = { 33 | Name = "${var.name}-efs" 34 | } 35 | } 36 | 37 | resource "aws_efs_mount_target" "this" { 38 | count = length(module.vpc.private_subnets) 39 | 40 | file_system_id = aws_efs_file_system.this.id 41 | subnet_id = module.vpc.private_subnets[count.index] 42 | security_groups = [aws_security_group.efs.id] 43 | } -------------------------------------------------------------------------------- /terraform/modules/eks-auto-mode/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { type = string } 2 | variable "region" { type = string } 3 | variable "eks_cluster_version" { type = string } 4 | variable "vpc_id" { type = string } 5 | variable "vpc_cidr" { type = string } 6 | variable "subnet_ids" { type = list(string) } 7 | variable "domain" { type = string } 8 | variable "efs_file_system_id" { type = string } 9 | variable "gpu_nodepool_capacity_type" { type = list(string) } 10 | variable "gpu_nodepool_instance_family" { type = list(string) } 11 | variable "enable_nginx" { 12 | type = bool 13 | default = true 14 | } 15 | variable "enable_lws" { 16 | type = bool 17 | default = true 18 | } 19 | 20 | terraform { 21 | required_version = ">= 1.5" 22 | 23 | required_providers { 24 | aws = { 25 | source = "hashicorp/aws" 26 | version = "~> 6.15.0" 27 | } 28 | kubernetes = { 29 | source = "hashicorp/kubernetes" 30 | version = "~> 2.38.0" 31 | } 32 | helm = { 33 | source = "hashicorp/helm" 34 | version = "~> 2.17.0" 35 | } 36 | kubectl = { 37 | source = "alekc/kubectl" 38 | version = "~> 2.1.3" 39 | } 40 | local = { 41 | source = "hashicorp/local" 42 | version = "~> 2.5.3" 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/docker/library/python:3.12-slim 2 | 3 | # Create app directory 4 | WORKDIR /app 5 | 6 | # print the version just to verify 7 | RUN python3 --version 8 | # start the virtual environment 9 | RUN python3 -m venv /opt/venv 10 | 11 | # Enable venv 12 | ENV PATH="/opt/venv/bin:$PATH" 13 | 14 | # Install some utilities; you may not need all of these 15 | RUN apt-get update 16 | RUN apt-get install -y git 17 | 18 | # Copy the requirements file 19 | COPY requirements*.txt . 20 | 21 | # Install app dependencies 22 | # If you use Poetry this step might be different 23 | RUN pip install -r requirements-lock.txt 24 | 25 | # Set the directory for nltk data 26 | ENV NLTK_DATA=/opt/nltk_data 27 | 28 | # Download punkt data 29 | RUN python -m nltk.downloader -d /opt/nltk_data punkt 30 | 31 | # Copy the rest over (including entrypoint script) 32 | # We use a .dockerignore to keep unwanted files exluded 33 | COPY . . 34 | 35 | # Make entrypoint script executable 36 | RUN chmod +x /app/docker-entrypoint.sh 37 | 38 | EXPOSE 8000 39 | 40 | # Use entrypoint script that will configure guardrails and install hub validators at runtime 41 | # The GUARDRAILS_TOKEN will be provided as an environment variable from Kubernetes 42 | ENTRYPOINT ["/app/docker-entrypoint.sh"] 43 | -------------------------------------------------------------------------------- /components/ai-gateway/kong/KONG.md: -------------------------------------------------------------------------------- 1 | # Kong AI Gateway OSS 2 | 3 | Currently, we only deploy Kong and setup the API key but we do not setup the routes and integration with OpenWeb UI. 4 | 5 | Notes: 6 | 7 | - [AI Proxy plugin](https://developer.konghq.com/plugins/ai-proxy) currently does not provide the simple way to setup mulitple models on the same URL path. See `components/ai-gateway/kong/examples/kong.yaml` as example to set a route for each model. 8 | 9 | - Alternatively, check [this example](https://developer.konghq.com/plugins/ai-proxy/examples/sdk-two-routes/) for setting up the AI proxy plugin routing based on matching different URL paths. 10 | 11 | - To integrate with Open WebUI, since the `/v1/models` path is not avaaiable, you will need to manually specify them on the `Model IDs` when creating the connection. 12 | 13 | - [Kong Manager OSS](https://github.com/Kong/kong-manager) currently does not provide an easy way to implement the authentication without the licensed enterprise RBAC feature. The workaround now is to do the port forward. 14 | 15 | ``` 16 | # Kong Manager 17 | kubectl -n kong port-forward svc/kong-kong-manager 8002:8002 18 | 19 | # Kong Admin API (also required since Kong Manager UI will connect directly to it from the browser) 20 | kubectl -n kong port-forward svc/kong-kong-admin 8001:800 21 | ``` 22 | -------------------------------------------------------------------------------- /components/o11y/mlflow/values.template.yaml: -------------------------------------------------------------------------------- 1 | global: 2 | security: 3 | # -- Allow insecure images to use our ECR images. 4 | allowInsecureImages: true 5 | 6 | ingress: 7 | enabled: true 8 | className: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 9 | annotations: 10 | alb.ingress.kubernetes.io/target-type: ip 11 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 12 | hosts: 13 | - paths: 14 | - path: / 15 | pathType: Prefix 16 | host: mlflow.{{{DOMAIN}}} 17 | 18 | extraEnvVars: 19 | MLFLOW_SERVER_ALLOWED_HOSTS: "localhost:*,10.*,mlflow.mlflow,mlflow.{{{DOMAIN}}}" 20 | # MLFLOW_LOGGING_LEVEL: "DEBUG" 21 | 22 | auth: 23 | enabled: true 24 | adminUsername: {{{MLFLOW_USERNAME}}} 25 | adminPassword: {{{MLFLOW_PASSWORD}}} 26 | 27 | postgresql: 28 | enabled: true 29 | image: 30 | registry: public.ecr.aws 31 | repository: agentic-ai-platforms-on-k8s/postgresql 32 | tag: 17.5.0-debian-12-r8 33 | pullPolicy: IfNotPresent 34 | auth: 35 | username: admin 36 | password: password123 37 | primary: 38 | resources: 39 | requests: 40 | cpu: 125m 41 | memory: 256Mi 42 | limits: 43 | memory: 256Mi 44 | 45 | artifactRoot: 46 | s3: 47 | enabled: true 48 | bucket: {{{MLFLOW_BUCKET_NAME}}} 49 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/config.workshop.json: -------------------------------------------------------------------------------- 1 | { 2 | "demo": { 3 | "components": [ 4 | { "category": "llm-model", "component": "vllm" }, 5 | { "category": "o11y", "component": "langfuse" }, 6 | { "category": "gui-app", "component": "openwebui" }, 7 | { "category": "ai-gateway", "component": "litellm" } 8 | ], 9 | "examples": [] 10 | }, 11 | "llm-model": { 12 | "vllm": { 13 | "models": [ 14 | { "name": "qwen3-8b-neuron", "deploy": true, "neuron": true, "compile": false }, 15 | { "name": "deepseek-r1-qwen3-8b-neuron", "deploy": true, "neuron": true, "compile": false } 16 | ] 17 | }, 18 | "sglang": { "models": [] }, 19 | "tgi": { "models": [] }, 20 | "ollama": { "models": [] } 21 | }, 22 | "embedding-model": { 23 | "tei": { "models": [] } 24 | }, 25 | "bedrock": { 26 | "llm": { "models": [{ "name": "claude-3.7-sonnet", "model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0" }] }, 27 | "embedding": { "models": [] } 28 | }, 29 | "docker": { 30 | "useBuildx": false, 31 | "arch": "arm64" 32 | }, 33 | "terraform": { 34 | "vars": { 35 | "efs_throughput_mode": "elastic", 36 | "neuron_nodepool_capacity_type": ["on-demand"], 37 | "enable_nginx": false, 38 | "enable_lws": false 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /terraform/alb-acm.tf: -------------------------------------------------------------------------------- 1 | resource "aws_acm_certificate" "wildcard" { 2 | count = var.domain != "" ? 1 : 0 3 | domain_name = "*.${var.domain}" 4 | validation_method = "DNS" 5 | subject_alternative_names = ["${var.domain}"] 6 | lifecycle { 7 | create_before_destroy = true 8 | } 9 | } 10 | 11 | data "aws_route53_zone" "selected" { 12 | count = var.domain != "" ? 1 : 0 13 | name = var.domain 14 | private_zone = false 15 | } 16 | 17 | resource "aws_route53_record" "validation" { 18 | for_each = var.domain != "" ? { 19 | for dvo in aws_acm_certificate.wildcard[0].domain_validation_options : dvo.domain_name => { 20 | name = dvo.resource_record_name 21 | record = dvo.resource_record_value 22 | type = dvo.resource_record_type 23 | } 24 | } : {} 25 | allow_overwrite = true 26 | name = each.value.name 27 | records = [each.value.record] 28 | ttl = 60 29 | type = each.value.type 30 | zone_id = data.aws_route53_zone.selected[0].zone_id 31 | } 32 | 33 | resource "aws_acm_certificate_validation" "wildcard" { 34 | count = var.domain != "" ? 1 : 0 35 | certificate_arn = aws_acm_certificate.wildcard[0].arn 36 | validation_record_fqdns = [for record in aws_route53_record.validation : record.fqdn] 37 | } 38 | -------------------------------------------------------------------------------- /components/o11y/phoenix/index.mjs: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "url"; 2 | import path from "path"; 3 | import fs from "fs"; 4 | import handlebars from "handlebars"; 5 | import { $ } from "zx"; 6 | $.verbose = true; 7 | 8 | export const name = "Phoenix"; 9 | const __filename = fileURLToPath(import.meta.url); 10 | const DIR = path.dirname(__filename); 11 | let BASE_DIR; 12 | let config; 13 | let utils; 14 | 15 | export async function init(_BASE_DIR, _config, _utils) { 16 | BASE_DIR = _BASE_DIR; 17 | config = _config; 18 | utils = _utils; 19 | } 20 | 21 | export async function install() { 22 | // const requiredEnvVars = []; 23 | // utils.checkRequiredEnvVars(requiredEnvVars); 24 | 25 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 26 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 27 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 28 | const valuesTemplate = handlebars.compile(valuesTemplateString); 29 | const valuesVars = { 30 | DOMAIN: process.env.DOMAIN, 31 | }; 32 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 33 | await $`helm upgrade --install phoenix oci://registry-1.docker.io/arizephoenix/phoenix-helm --namespace phoenix --create-namespace -f ${valuesRenderedPath}`; 34 | } 35 | 36 | export async function uninstall() { 37 | await $`helm uninstall phoenix --namespace phoenix`; 38 | } 39 | -------------------------------------------------------------------------------- /components/workflow-automation/n8n/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "n8n"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | // const requiredEnvVars = []; 25 | // utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 28 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 29 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 30 | const valuesTemplate = handlebars.compile(valuesTemplateString); 31 | const valuesVars = { 32 | DOMAIN: process.env.DOMAIN, 33 | }; 34 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 35 | await $`helm upgrade --install n8n oci://8gears.container-registry.com/library/n8n --namespace n8n --create-namespace -f ${valuesRenderedPath}`; 36 | } 37 | 38 | export async function uninstall() { 39 | await $`helm uninstall n8n --namespace n8n`; 40 | } 41 | -------------------------------------------------------------------------------- /components/vector-database/chroma/index.mjs: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "url"; 2 | import path from "path"; 3 | import fs from "fs"; 4 | import handlebars from "handlebars"; 5 | import { $ } from "zx"; 6 | $.verbose = true; 7 | 8 | export const name = "Chroma"; 9 | const __filename = fileURLToPath(import.meta.url); 10 | const DIR = path.dirname(__filename); 11 | let BASE_DIR; 12 | let config; 13 | let utils; 14 | 15 | export async function init(_BASE_DIR, _config, _utils) { 16 | BASE_DIR = _BASE_DIR; 17 | config = _config; 18 | utils = _utils; 19 | } 20 | 21 | export async function install() { 22 | // const requiredEnvVars = []; 23 | // utils.checkRequiredEnvVars(requiredEnvVars); 24 | 25 | await $`helm repo add chroma https://amikos-tech.github.io/chromadb-chart`; 26 | await $`helm repo update`; 27 | 28 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 29 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 30 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 31 | const valuesTemplate = handlebars.compile(valuesTemplateString); 32 | const valuesVars = {}; 33 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 34 | await $`helm upgrade --install chroma chroma/chromadb --namespace chroma --create-namespace -f ${valuesRenderedPath}`; 35 | } 36 | 37 | export async function uninstall() { 38 | await $`helm uninstall chroma --namespace chroma`; 39 | } 40 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/docker/build-image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Build and push multi-arch image to public ECR 5 | # Usage: ./build-image.sh [version] 6 | 7 | PUBLIC_ECR_REGISTRY="public.ecr.aws/agentic-ai-platforms-on-k8s" 8 | COMPONENT="guardrails-ai" 9 | VERSION=${1:-"latest"} 10 | 11 | echo "Building multi-arch image for component: ${COMPONENT}" 12 | echo "Version: ${VERSION}" 13 | echo "Registry: ${PUBLIC_ECR_REGISTRY}" 14 | 15 | # Get the directory where this script is located 16 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 17 | 18 | # Authenticate to public ECR (public ECR requires us-east-1 region) 19 | echo "Authenticating to public ECR..." 20 | aws ecr-public get-login-password --region us-east-1 | \ 21 | docker login --username AWS --password-stdin public.ecr.aws 22 | 23 | # Check if Dockerfile exists 24 | if [ ! -f "${SCRIPT_DIR}/Dockerfile" ]; then 25 | echo "Error: Dockerfile not found in ${SCRIPT_DIR}" 26 | exit 1 27 | fi 28 | 29 | # Build and push multi-arch image 30 | echo "Building and pushing multi-arch image..." 31 | docker buildx build \ 32 | --platform linux/amd64,linux/arm64 \ 33 | -t ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:${VERSION} \ 34 | -t ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:latest \ 35 | --push \ 36 | ${SCRIPT_DIR}/ 37 | 38 | echo "Successfully built and pushed ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:${VERSION}" 39 | echo "Also tagged as: ${PUBLIC_ECR_REGISTRY}/${COMPONENT}:latest" 40 | -------------------------------------------------------------------------------- /components/llm-model/tgi/archive/job-tgi-neuron-build.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: tgi-neuron-build 5 | namespace: tgi 6 | spec: 7 | template: 8 | spec: 9 | restartPolicy: Never 10 | securityContext: 11 | seccompProfile: 12 | type: RuntimeDefault 13 | automountServiceAccountToken: false 14 | # nodeSelector: 15 | # {{{KARPENTER_PREFIX}}}/instance-family: inf2 16 | serviceAccountName: tgi-neuron-build 17 | containers: 18 | - name: kaniko 19 | image: gcr.io/kaniko-project/executor:latest 20 | args: 21 | - "--context=git://github.com/aonz/text-generation-inference.git" 22 | - "--dockerfile=Dockerfile.neuron" 23 | - "--destination={{{IMAGE}}}" 24 | # - "--verbosity=debug" 25 | env: 26 | - name: AWS_REGION 27 | value: us-west-2 28 | resources: 29 | requests: 30 | cpu: 24 #75% 31 | memory: 96Gi #75% 32 | aws.amazon.com/neuroncore: 2 33 | limits: 34 | aws.amazon.com/neuroncore: 2 35 | tolerations: 36 | - key: aws.amazon.com/neuron 37 | operator: Exists 38 | effect: NoSchedule 39 | --- 40 | apiVersion: v1 41 | kind: ServiceAccount 42 | metadata: 43 | name: tgi-neuron-build 44 | namespace: tgi 45 | automountServiceAccountToken: false 46 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/app.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | name: guardrails-ai 5 | --- 6 | apiVersion: v1 7 | kind: Secret 8 | metadata: 9 | name: guardrails-ai-secret 10 | namespace: guardrails-ai 11 | type: Opaque 12 | stringData: 13 | token: "{{{GUARDRAILS_TOKEN}}}" 14 | --- 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: guard 19 | namespace: guardrails-ai 20 | spec: 21 | replicas: 1 22 | selector: 23 | matchLabels: 24 | app: guard 25 | template: 26 | metadata: 27 | labels: 28 | app: guard 29 | spec: 30 | {{#unless useBuildx}} 31 | nodeSelector: 32 | kubernetes.io/arch: {{{arch}}} 33 | {{/unless}} 34 | containers: 35 | - name: guard 36 | image: {{{IMAGE}}} 37 | env: 38 | - name: GUARDRAILS_TOKEN 39 | valueFrom: 40 | secretKeyRef: 41 | name: guardrails-ai-secret 42 | key: token 43 | ports: 44 | - name: http 45 | containerPort: 8000 46 | resources: 47 | requests: 48 | cpu: 1.5 49 | memory: 12Gi 50 | limits: 51 | memory: 12Gi 52 | --- 53 | apiVersion: v1 54 | kind: Service 55 | metadata: 56 | name: guard 57 | namespace: guardrails-ai 58 | spec: 59 | selector: 60 | app: guard 61 | ports: 62 | - name: http 63 | port: 8000 64 | -------------------------------------------------------------------------------- /components/o11y/mlflow/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = "~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 10 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 11 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 12 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 13 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 14 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 15 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 16 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 17 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 18 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 19 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 20 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 21 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 22 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 23 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = "~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 10 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 11 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 12 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 13 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 14 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 15 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 16 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 17 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 18 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 19 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 20 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 21 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 22 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 23 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Guardrails AI"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["GUARDRAILS_AI_API_KEY"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | const { GUARDRAILS_AI_API_KEY } = process.env; 28 | 29 | // Use pre-built public ECR image instead of building locally 30 | const publicEcrImage = "public.ecr.aws/agentic-ai-platforms-on-k8s/guardrails-ai:latest"; 31 | 32 | const appTemplatePath = path.join(DIR, "app.template.yaml"); 33 | const appRenderedPath = path.join(DIR, "app.rendered.yaml"); 34 | const { arch } = config.docker; 35 | const appVars = { 36 | arch, 37 | IMAGE: publicEcrImage, 38 | GUARDRAILS_TOKEN: GUARDRAILS_AI_API_KEY, 39 | }; 40 | utils.renderTemplate(appTemplatePath, appRenderedPath, appVars); 41 | await $`kubectl apply -f ${DIR}/app.rendered.yaml`; 42 | } 43 | 44 | export async function uninstall() { 45 | await $`kubectl delete -f ${DIR}/app.rendered.yaml --ignore-not-found`; 46 | } 47 | -------------------------------------------------------------------------------- /components/vector-database/milvus/ingress.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | annotations: 5 | external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only 6 | nginx.ingress.kubernetes.io/auth-type: basic 7 | nginx.ingress.kubernetes.io/auth-secret: basic-auth 8 | nginx.ingress.kubernetes.io/auth-realm: "Authentication Required" 9 | name: milvus 10 | namespace: milvus 11 | spec: 12 | ingressClassName: nginx 13 | rules: 14 | - http: 15 | paths: 16 | - path: / 17 | pathType: Prefix 18 | backend: 19 | service: 20 | name: milvus 21 | port: 22 | number: 9091 23 | {{#if DOMAIN}} 24 | host: milvus.{{{DOMAIN}}} 25 | {{/if}} 26 | --- 27 | apiVersion: networking.k8s.io/v1 28 | kind: Ingress 29 | metadata: 30 | annotations: 31 | alb.ingress.kubernetes.io/target-type: ip 32 | {{#if DOMAIN}} 33 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 34 | {{/if}} 35 | name: milvus-alb 36 | namespace: ingress-nginx 37 | spec: 38 | ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 39 | rules: 40 | - http: 41 | paths: 42 | - path: / 43 | pathType: Prefix 44 | backend: 45 | service: 46 | name: ingress-nginx-controller 47 | port: 48 | number: 80 49 | {{#if DOMAIN}} 50 | host: milvus.{{{DOMAIN}}} 51 | {{/if}} -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/build-push-docker-image.sh: -------------------------------------------------------------------------------- 1 | docker buildx rm multiarch-builder || true 2 | 3 | # Create a new builder instance 4 | docker buildx create --name multiarch-builder --use 5 | 6 | # Build for both x86_64 and ARM64 7 | docker buildx build --platform linux/amd64,linux/arm64 -t your-registry/loan-buddy:latest --push . 8 | 9 | # Or build locally for testing 10 | # docker buildx build --platform linux/amd64,linux/arm64 -t loan-buddy:latest --load . 11 | 12 | 13 | - name: LANGFUSE_URL 14 | value: "http://langfuse-web.langfuse.svc.cluster.local:3000" 15 | - name: LANGFUSE_PUBLIC_KEY 16 | value: "lf_pk_1234567890" 17 | - name: LANGFUSE_SECRET_KEY 18 | value: "lf_sk_1234567890" 19 | - name: GATEWAY_URL 20 | value: "http://litellm.litellm.svc.cluster.local:4000" 21 | - name: GATEWAY_MODEL_ACCESS_KEY 22 | value: "sk-4qgicypE01dIhc5mPsBWDQ" 23 | - name: S3_BUCKET_NAME 24 | value: "langfuse" 25 | - name: S3_ENDPOINT_URL 26 | value: "http://langfuse-s3.langfuse.svc.cluster.local:9000" 27 | - name: S3_ACCESS_KEY 28 | value: "minio" 29 | - name: S3_SECRET_KEY 30 | value: "password123" 31 | - name: MCP_ADDRESS_VALIDATOR 32 | value: "http://mcp-address-validator:8000" 33 | - name: MCP_EMPLOYMENT_VALIDATOR 34 | value: "http://mcp-employment-validator:8000" 35 | - name: MCP_IMAGE_PROCESSOR 36 | value: "http://mcp-image-processor:8000" 37 | -------------------------------------------------------------------------------- /components/o11y/langfuse/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = "~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=", 10 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 11 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 12 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 13 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 14 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 15 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 16 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 17 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 18 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 19 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 20 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 21 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 22 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 23 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 24 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = "~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=", 10 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 11 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 12 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 13 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 14 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 15 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 16 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 17 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 18 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 19 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 20 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 21 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 22 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 23 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 24 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /components/guardrail/guardrails-ai/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = "~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=", 10 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 11 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 12 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 13 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 14 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 15 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 16 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 17 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 18 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 19 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 20 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 21 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 22 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 23 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 24 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /components/gui-app/openwebui/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Open WebUI"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["LITELLM_API_KEY"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await $`helm repo add open-webui https://open-webui.github.io/helm-charts`; 28 | await $`helm repo update`; 29 | 30 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 31 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 32 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 33 | const valuesTemplate = handlebars.compile(valuesTemplateString); 34 | const valuesVars = { 35 | DOMAIN: process.env.DOMAIN, 36 | LITELLM_API_KEY: process.env.LITELLM_API_KEY, 37 | }; 38 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 39 | await $`helm upgrade --install openwebui open-webui/open-webui --namespace openwebui --create-namespace -f ${valuesRenderedPath}`; 40 | } 41 | 42 | export async function uninstall() { 43 | await $`helm uninstall openwebui --namespace openwebui`; 44 | } 45 | -------------------------------------------------------------------------------- /components/ai-gateway/litellm/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = ">= 5.30.0, ~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=", 10 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 11 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 12 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 13 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 14 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 15 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 16 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 17 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 18 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 19 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 20 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 21 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 22 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 23 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 24 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /components/llm-model/vllm/archive/job-vllm-neuron-build.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: vllm-neuron-build 5 | namespace: vllm 6 | spec: 7 | template: 8 | spec: 9 | restartPolicy: Never 10 | securityContext: 11 | seccompProfile: 12 | type: RuntimeDefault 13 | automountServiceAccountToken: false 14 | # nodeSelector: 15 | # {{{KARPENTER_PREFIX}}}/instance-family: inf2 16 | serviceAccountName: vllm-neuron-build 17 | containers: 18 | - name: kaniko 19 | image: gcr.io/kaniko-project/executor:latest 20 | args: 21 | # TODO: Switch back to AWS Neuron repo when the PR is merged. 22 | # https://github.com/aws-neuron/upstreaming-to-vllm/pull/25 23 | - "--context=git://github.com/aonz/upstreaming-to-vllm.git" 24 | - "--dockerfile=docker/Dockerfile.neuron" 25 | - "--destination={{{IMAGE}}}" 26 | # - "--verbosity=debug" 27 | env: 28 | - name: AWS_REGION 29 | value: us-west-2 30 | resources: 31 | requests: 32 | cpu: 24 #75% 33 | memory: 96Gi #90% 34 | aws.amazon.com/neuron: 1 35 | limits: 36 | aws.amazon.com/neuron: 1 37 | tolerations: 38 | - key: aws.amazon.com/neuron 39 | operator: Exists 40 | effect: NoSchedule 41 | --- 42 | apiVersion: v1 43 | kind: ServiceAccount 44 | metadata: 45 | name: vllm-neuron-build 46 | namespace: vllm 47 | automountServiceAccountToken: false 48 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/.terraform.lock.hcl: -------------------------------------------------------------------------------- 1 | # This file is maintained automatically by "terraform init". 2 | # Manual edits may be lost in future updates. 3 | 4 | provider "registry.terraform.io/hashicorp/aws" { 5 | version = "5.96.0" 6 | constraints = ">= 5.30.0, ~> 5.96.0" 7 | hashes = [ 8 | "h1:a/VEUu6BGQSPlUAzbN+zqaDCdi0QGh/VzBgo2gCran0=", 9 | "h1:hqoQJnKaTfzNge5oCELAs+jqiT0R0oygDYlG4pmy3yk=", 10 | "zh:3f7e734abb9d647c851f5cb987837d7c073c9cbf1f520a031027d827f93d3b68", 11 | "zh:5ca9400360a803a11cf432ca203be9f09da8fff9c96110a83c9029102b18c9d5", 12 | "zh:5d421f475d467af182a527b7a61d50105dc63394316edf1c775ef736f84b941c", 13 | "zh:68f2328e7f3e7666835d6815b39b46b08954a91204f82a6f648c928a0b09a744", 14 | "zh:6a4170e7e2764df2968d1df65efebda55273dfc36dc6741207afb5e4b7e85448", 15 | "zh:73f2a15bee21f7c92a071e2520216d0a40041aca52c0f6682e540da8ffcfada4", 16 | "zh:9843d6973aedfd4cbaafd7110420d0c4c1d7ef4a2eeff508294c3adcc3613145", 17 | "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", 18 | "zh:9d1abd6be717c42f2a6257ee227d3e9548c31f01c976ed7b32b2745a63659a67", 19 | "zh:a70d642e323021d54a92f0daa81d096cb5067cb99ce116047a42eb1cb1d579a0", 20 | "zh:b9a2b293208d5a0449275fae463319e0998c841e0bcd4014594a49ba54bb70d6", 21 | "zh:ce0b0eb7ac24ff58c20efcb526c3f792a95be3617c795b45bbeea9f302903ae7", 22 | "zh:dbbf98b3cd8003833c472bdb89321c17a9bbdc1b785e7e3d75f8af924ee5a0e4", 23 | "zh:df86cf9311a4be8bb4a251196650653f97e01fbf5fe72deecc8f28a35a5352ae", 24 | "zh:f92992881afd9339f3e539fcd90cfc1e9ed1356b5e760bbcc804314c3cd6837f", 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | locals { 21 | app = "calculator-agent" 22 | namespace = "agno" 23 | full_name = "${var.name}-${local.namespace}-${local.app}" 24 | } 25 | resource "aws_ecr_repository" "this" { 26 | name = local.full_name 27 | image_tag_mutability = "MUTABLE" 28 | force_delete = true 29 | 30 | image_scanning_configuration { 31 | scan_on_push = true 32 | } 33 | 34 | encryption_configuration { 35 | encryption_type = "KMS" 36 | } 37 | } 38 | output "ecr_repository_url" { 39 | value = aws_ecr_repository.this.repository_url 40 | } 41 | 42 | module "pod_identity" { 43 | source = "terraform-aws-modules/eks-pod-identity/aws" 44 | version = "1.12.0" 45 | 46 | name = local.full_name 47 | use_name_prefix = false 48 | attach_custom_policy = true 49 | policy_statements = [ 50 | { 51 | sid = "Bedrock" 52 | actions = [ 53 | "bedrock:InvokeModel", 54 | "bedrock:InvokeModelWithResponseStream", 55 | ] 56 | resources = ["*"] 57 | } 58 | ] 59 | associations = { 60 | app = { 61 | service_account = local.app 62 | namespace = local.namespace 63 | cluster_name = var.name 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | locals { 21 | app = "calculator-agent" 22 | namespace = "strands-agents" 23 | full_name = "${var.name}-${local.namespace}-${local.app}" 24 | } 25 | resource "aws_ecr_repository" "this" { 26 | name = local.full_name 27 | image_tag_mutability = "MUTABLE" 28 | force_delete = true 29 | 30 | image_scanning_configuration { 31 | scan_on_push = true 32 | } 33 | 34 | encryption_configuration { 35 | encryption_type = "KMS" 36 | } 37 | } 38 | output "ecr_repository_url" { 39 | value = aws_ecr_repository.this.repository_url 40 | } 41 | 42 | module "pod_identity" { 43 | source = "terraform-aws-modules/eks-pod-identity/aws" 44 | version = "1.12.0" 45 | 46 | name = local.full_name 47 | use_name_prefix = false 48 | attach_custom_policy = true 49 | policy_statements = [ 50 | { 51 | sid = "Bedrock" 52 | actions = [ 53 | "bedrock:InvokeModel", 54 | "bedrock:InvokeModelWithResponseStream", 55 | ] 56 | resources = ["*"] 57 | } 58 | ] 59 | associations = { 60 | app = { 61 | service_account = local.app 62 | namespace = local.namespace 63 | cluster_name = var.name 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /examples/agno/calculator-agent/openwebui_pipe_function.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from pydantic import BaseModel 3 | 4 | 5 | class Pipe: 6 | class Valves(BaseModel): 7 | AGENT_ENDPOINT: str = "http://calculator-agent.agno" 8 | 9 | def __init__(self): 10 | self.valves = self.Valves() 11 | 12 | def pipes(self): 13 | return [{"id": "agno-calculator-agent", "name": "Agno - Calculator Agent"}] 14 | 15 | def pipe(self, body: dict, __user__: dict): 16 | messages = body.get("messages", []) 17 | last_user_message = next( 18 | (m for m in reversed(messages) if m.get("role") == "user"), None 19 | ) 20 | 21 | if not last_user_message: 22 | return 23 | 24 | message = last_user_message["content"] 25 | if message.startswith("### Task"): 26 | print("Skip: ### Task") 27 | return 28 | 29 | print("Latest user message:", message) 30 | 31 | try: 32 | response = requests.post( 33 | url=self.valves.AGENT_ENDPOINT, 34 | json={"prompt": message}, 35 | headers={"Content-Type": "application/json"}, 36 | stream=True, 37 | timeout=60, 38 | ) 39 | response.raise_for_status() 40 | 41 | if body.get("stream", False): 42 | return self.stream_response(response) 43 | else: 44 | return response.text 45 | except Exception as e: 46 | return f"Error: {e}" 47 | 48 | def stream_response(self, response): 49 | for line in response.iter_lines(decode_unicode=True): 50 | if line: 51 | yield line + "\n" 52 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/static/code/module3/credit-validation/requirements.txt: -------------------------------------------------------------------------------- 1 | # # Credit Underwriting System Dependencies 2 | 3 | # # Environment variables 4 | # python-dotenv>=1.0.0 5 | 6 | # # Core MCP and LangChain dependencies 7 | # langchain-mcp-adapters>=0.1.0 8 | # langgraph>=0.2.0 9 | # langchain 10 | # langchain-openai>=0.2.0 11 | 12 | # # MCP Server framework 13 | # mcp>=1.0.0 14 | 15 | # # Web framework and server 16 | # fastapi>=0.104.0 17 | # uvicorn[standard]>=0.24.0 18 | 19 | # # HTTP client for testing 20 | # aiohttp>=3.9.0 21 | 22 | # # Monitoring and tracing 23 | # langfuse>=2.60.4 24 | 25 | # # Image processing and encoding 26 | # Pillow>=10.0.0 27 | 28 | # # AWS SDK for S3 operations 29 | # boto3>=1.34.0 30 | # botocore>=1.34.0 31 | 32 | # # Async context management 33 | # contextlib2>=21.6.0 34 | 35 | # # Additional utilities 36 | # python-multipart>=0.0.6 37 | # pydantic>=2.0.0 38 | 39 | # # Development and testing 40 | # pytest>=7.4.0 41 | # pytest-asyncio>=0.21.0 42 | 43 | 44 | # Credit Underwriting System Dependencies 45 | 46 | # Environment variables 47 | python-dotenv 48 | 49 | # Core MCP and LangChain dependencies 50 | langchain-mcp-adapters 51 | langgraph 52 | langchain 53 | langchain-openai 54 | 55 | # MCP Server framework 56 | mcp 57 | 58 | # fast-mcp>=2.8.1 59 | 60 | # Web framework and server 61 | fastapi 62 | uvicorn[standard] 63 | 64 | # HTTP client for testing 65 | aiohttp 66 | 67 | # Monitoring and tracing 68 | langfuse 69 | 70 | # Image processing and encoding 71 | Pillow 72 | 73 | # AWS SDK for S3 operations 74 | boto3 75 | botocore 76 | 77 | # Async context management 78 | contextlib2 79 | 80 | # Additional utilities 81 | python-multipart 82 | pydantic 83 | 84 | # Development and testing 85 | # pytest>=7.4.0 86 | # pytest-asyncio>=0.21.0 87 | -------------------------------------------------------------------------------- /workshops/eks-genai-workshop/content/introduction/getting-started/index.en.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Getting Started" 3 | chapter: true 4 | weight: 12 5 | --- 6 | 7 | Before we dive into deploying Large Language Models and building intelligent agents, let's ensure you have access to the workshop environment. This workshop can be completed in two ways, depending on how you're participating. 8 | 9 | ## 🎯 Choose Your Path 10 | 11 | ::::tabs 12 | 13 | :::tab{label="AWS Event"} 14 | ### 🎪 Participating in an AWS Event 15 | 16 | **Your infrastructure is pre-deployed and ready!** 17 | 18 | The event organizers have already set up: 19 | - ✅ EKS cluster with all required components 20 | - ✅ GenAI platform stack (vLLM, LiteLLM, Langfuse) 21 | - ✅ Model storage and caching 22 | - ✅ Networking and security configurations 23 | 24 | [**Continue with AWS Event Setup →**](/introduction/getting-started/at-aws-event/) 25 | ::: 26 | 27 | :::tab{label="Own Account"} 28 | ### 💻 Using Your Own AWS Account 29 | 30 | Running this workshop independently in your personal or company AWS account gives you: 31 | - Full control over the environment 32 | - Ability to keep resources after the workshop 33 | - Opportunity to customize configurations 34 | - Deeper understanding of the infrastructure 35 | 36 | **You'll need to**: 37 | - ✅ Have an AWS account with appropriate permissions 38 | - ✅ Deploy the workshop infrastructure 39 | - ✅ Configure access to the EKS cluster 40 | - ✅ Verify all components are running 41 | 42 | ::alert[**Cost Warning**: This workshop uses GPU and Neuron instances which incur charges. Remember to clean up resources after completion!]{type="warning"} 43 | 44 | [**Continue with Own Account Setup →**](/introduction/getting-started/self-account/) 45 | ::: 46 | 47 | --- 48 | 49 | **Ready? Select your path above to continue!** 50 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "name" { 2 | type = string 3 | default = "genai-on-eks" 4 | } 5 | variable "region" { 6 | type = string 7 | default = "us-west-2" 8 | } 9 | variable "vpc_cidr" { 10 | type = string 11 | default = "10.0.0.0/16" 12 | } 13 | variable "eks_cluster_version" { 14 | type = string 15 | default = "1.34" 16 | } 17 | variable "domain" { 18 | type = string 19 | default = "bursting" 20 | } 21 | variable "efs_throughput_mode" { 22 | type = string 23 | default = "" 24 | } 25 | variable "gpu_nodepool_capacity_type" { 26 | type = list(string) 27 | default = ["spot", "on-demand"] 28 | } 29 | 30 | variable "gpu_nodepool_instance_family" { 31 | type = list(string) 32 | default = ["g6e", "g6", "g5g", "p5en", "p5e", "p5", "p4de", "p4d"] 33 | } 34 | 35 | variable "enable_nginx" { 36 | type = bool 37 | default = true 38 | } 39 | 40 | variable "enable_lws" { 41 | type = bool 42 | default = true 43 | } 44 | 45 | locals { 46 | account_id = data.aws_caller_identity.current.account_id 47 | } 48 | 49 | data "aws_caller_identity" "current" {} 50 | 51 | data "aws_availability_zones" "available" {} 52 | 53 | terraform { 54 | required_version = ">= 1.5" 55 | 56 | required_providers { 57 | aws = { 58 | source = "hashicorp/aws" 59 | version = "~> 6.15.0" 60 | } 61 | kubernetes = { 62 | source = "hashicorp/kubernetes" 63 | version = "~> 2.38.0" 64 | } 65 | helm = { 66 | source = "hashicorp/helm" 67 | version = "~> 2.17.0" 68 | } 69 | kubectl = { 70 | source = "alekc/kubectl" 71 | version = "~> 2.1.3" 72 | } 73 | local = { 74 | source = "hashicorp/local" 75 | version = "~> 2.5.3" 76 | } 77 | } 78 | } 79 | 80 | provider "aws" { region = var.region } 81 | -------------------------------------------------------------------------------- /components/llm-model/tgi/archive/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | locals { 21 | name = "tgi-neuron" 22 | } 23 | resource "aws_ecr_repository" "this" { 24 | name = "${var.name}-${local.name}" 25 | image_tag_mutability = "MUTABLE" 26 | force_delete = true 27 | 28 | image_scanning_configuration { 29 | scan_on_push = true 30 | } 31 | 32 | encryption_configuration { 33 | encryption_type = "KMS" 34 | } 35 | } 36 | output "ecr_repository_url" { 37 | value = aws_ecr_repository.this.repository_url 38 | } 39 | 40 | module "pod_identity" { 41 | source = "terraform-aws-modules/eks-pod-identity/aws" 42 | version = "1.12.0" 43 | 44 | name = "${var.name}-${var.region}-tgi-neuron-build" 45 | use_name_prefix = false 46 | attach_custom_policy = true 47 | policy_statements = [ 48 | { 49 | actions = [ 50 | "ecr:GetAuthorizationToken", 51 | ] 52 | resources = ["*"] 53 | }, 54 | { 55 | actions = [ 56 | "ecr:CompleteLayerUpload", 57 | "ecr:UploadLayerPart", 58 | "ecr:InitiateLayerUpload", 59 | "ecr:BatchCheckLayerAvailability", 60 | "ecr:PutImage", 61 | "ecr:BatchGetImage" 62 | ] 63 | resources = [aws_ecr_repository.this.arn] 64 | } 65 | ] 66 | associations = { 67 | litellm = { 68 | service_account = "tgi-neuron-build" 69 | namespace = "tgi" 70 | cluster_name = var.name 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /components/o11y/mlflow/index.mjs: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "url"; 2 | import path from "path"; 3 | import fs from "fs"; 4 | import handlebars from "handlebars"; 5 | import { $ } from "zx"; 6 | $.verbose = true; 7 | 8 | export const name = "MLflow"; 9 | const __filename = fileURLToPath(import.meta.url); 10 | const DIR = path.dirname(__filename); 11 | let BASE_DIR; 12 | let config; 13 | let utils; 14 | 15 | export async function init(_BASE_DIR, _config, _utils) { 16 | BASE_DIR = _BASE_DIR; 17 | config = _config; 18 | utils = _utils; 19 | } 20 | 21 | export async function install() { 22 | const requiredEnvVars = ["MLFLOW_USERNAME", "MLFLOW_PASSWORD"]; 23 | utils.checkRequiredEnvVars(requiredEnvVars); 24 | 25 | await utils.terraform.apply(DIR); 26 | const mlflowBucketName = await utils.terraform.output(DIR, { outputName: "mlflow_bucket_name" }); 27 | 28 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 29 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 30 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 31 | const valuesTemplate = handlebars.compile(valuesTemplateString); 32 | const valuesVars = { 33 | DOMAIN: process.env.DOMAIN, 34 | MLFLOW_USERNAME: process.env.MLFLOW_USERNAME, 35 | MLFLOW_PASSWORD: process.env.MLFLOW_PASSWORD, 36 | MLFLOW_BUCKET_NAME: mlflowBucketName, 37 | }; 38 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 39 | await $`helm repo add community-charts https://community-charts.github.io/helm-charts`; 40 | await $`helm upgrade --install mlflow community-charts/mlflow --namespace mlflow --create-namespace -f ${valuesRenderedPath}`; 41 | } 42 | 43 | export async function uninstall() { 44 | await $`helm uninstall mlflow --namespace mlflow`; 45 | await utils.terraform.destroy(DIR); 46 | } 47 | -------------------------------------------------------------------------------- /components/llm-model/vllm/archive/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | locals { 21 | name = "vllm-neuron" 22 | } 23 | resource "aws_ecr_repository" "this" { 24 | name = "${var.name}-${local.name}" 25 | image_tag_mutability = "MUTABLE" 26 | force_delete = true 27 | 28 | image_scanning_configuration { 29 | scan_on_push = true 30 | } 31 | 32 | encryption_configuration { 33 | encryption_type = "KMS" 34 | } 35 | } 36 | output "ecr_repository_url" { 37 | value = aws_ecr_repository.this.repository_url 38 | } 39 | 40 | module "pod_identity" { 41 | source = "terraform-aws-modules/eks-pod-identity/aws" 42 | version = "1.12.0" 43 | 44 | name = "${var.name}-${var.region}-vllm-neuron-build" 45 | use_name_prefix = false 46 | attach_custom_policy = true 47 | policy_statements = [ 48 | { 49 | actions = [ 50 | "ecr:GetAuthorizationToken", 51 | ] 52 | resources = ["*"] 53 | }, 54 | { 55 | actions = [ 56 | "ecr:CompleteLayerUpload", 57 | "ecr:UploadLayerPart", 58 | "ecr:InitiateLayerUpload", 59 | "ecr:BatchCheckLayerAvailability", 60 | "ecr:PutImage", 61 | "ecr:BatchGetImage" 62 | ] 63 | resources = [aws_ecr_repository.this.arn] 64 | } 65 | ] 66 | associations = { 67 | litellm = { 68 | service_account = "vllm-neuron-build" 69 | namespace = "vllm" 70 | cluster_name = var.name 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/openwebui_pipe_function.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from pydantic import BaseModel 3 | 4 | 5 | class Pipe: 6 | class Valves(BaseModel): 7 | AGENT_ENDPOINT: str = "http://calculator-agent.strands-agents" 8 | 9 | def __init__(self): 10 | self.valves = self.Valves() 11 | 12 | def pipes(self): 13 | return [ 14 | { 15 | "id": "strands-agents-calculator-agent", 16 | "name": "Strands Agents - Calculator Agent", 17 | } 18 | ] 19 | 20 | def pipe(self, body: dict, __user__: dict): 21 | messages = body.get("messages", []) 22 | last_user_message = next( 23 | (m for m in reversed(messages) if m.get("role") == "user"), None 24 | ) 25 | 26 | if not last_user_message: 27 | return 28 | 29 | message = last_user_message["content"] 30 | if message.startswith("### Task"): 31 | print("Skip: ### Task") 32 | return 33 | 34 | print("Latest user message:", message) 35 | 36 | try: 37 | response = requests.post( 38 | url=self.valves.AGENT_ENDPOINT, 39 | json={"prompt": message}, 40 | headers={"Content-Type": "application/json"}, 41 | stream=True, 42 | timeout=60, 43 | ) 44 | response.raise_for_status() 45 | 46 | if body.get("stream", False): 47 | return self.stream_response(response) 48 | else: 49 | return response.text 50 | except Exception as e: 51 | return f"Error: {e}" 52 | 53 | def stream_response(self, response): 54 | for line in response.iter_lines(decode_unicode=True): 55 | if line: 56 | yield line + "\n" 57 | -------------------------------------------------------------------------------- /components/llm-model/sglang/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "SGLang"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["HF_TOKEN"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`; 28 | await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`; 29 | const secretTemplatePath = path.join(DIR, "secret.template.yaml"); 30 | const secretRenderedPath = path.join(DIR, "secret.rendered.yaml"); 31 | const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8"); 32 | const secretTemplate = handlebars.compile(secretTemplateString); 33 | const secretVars = { 34 | HF_TOKEN: process.env.HF_TOKEN, 35 | }; 36 | fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars)); 37 | await $`kubectl apply -f ${secretRenderedPath}`; 38 | const { models } = config["llm-model"]["sglang"]; 39 | await utils.model.addModels(models, "llm-model", "sglang"); 40 | } 41 | 42 | export async function uninstall() { 43 | const { models } = config["llm-model"]["sglang"]; 44 | await utils.model.removeAllModels(models, "llm-model", "sglang"); 45 | await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`; 46 | await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`; 47 | await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`; 48 | } 49 | -------------------------------------------------------------------------------- /components/llm-model/ollama/deployment.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: ollama 5 | namespace: ollama 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: ollama 11 | template: 12 | metadata: 13 | labels: 14 | app: ollama 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: ollama 24 | image: ollama/ollama:0.10.1 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | lifecycle: 34 | postStart: 35 | exec: 36 | command: ["/scripts/init-ollama.sh"] 37 | ports: 38 | - name: http 39 | containerPort: 11434 40 | resources: 41 | requests: 42 | cpu: 3 #75% 43 | memory: 24Gi #75% 44 | nvidia.com/gpu: 1 45 | limits: 46 | nvidia.com/gpu: 1 47 | volumeMounts: 48 | - name: ollama-init-script 49 | mountPath: /scripts 50 | - name: ollama-cache 51 | mountPath: /root/.ollama 52 | volumes: 53 | - name: ollama-init-script 54 | configMap: 55 | name: ollama-init-script 56 | defaultMode: 0755 57 | - name: ollama-cache 58 | persistentVolumeClaim: 59 | claimName: ollama-cache 60 | tolerations: 61 | - key: nvidia.com/gpu 62 | operator: Exists 63 | effect: NoSchedule 64 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/server.py: -------------------------------------------------------------------------------- 1 | from fastmcp import FastMCP 2 | 3 | mcp = FastMCP("Calculator") 4 | 5 | 6 | # Define a simple addition tool 7 | @mcp.tool(description="Add two numbers together") 8 | def add(x: int, y: int) -> int: 9 | """Add two numbers and return the result. 10 | 11 | Args: 12 | x: First number 13 | y: Second number 14 | 15 | Returns: 16 | The sum of x and y 17 | """ 18 | print("Calling add tool.\n") 19 | return x + y 20 | 21 | 22 | # Define a subtraction tool 23 | @mcp.tool(description="Subtract one number from another") 24 | def subtract(x: int, y: int) -> int: 25 | """Subtract y from x and return the result. 26 | 27 | Args: 28 | x: Number to subtract from 29 | y: Number to subtract 30 | 31 | Returns: 32 | The difference (x - y) 33 | """ 34 | print("Calling subtract tool.\n") 35 | return x - y 36 | 37 | 38 | # Define a multiplication tool 39 | @mcp.tool(description="Multiply two numbers together") 40 | def multiply(x: int, y: int) -> int: 41 | """Multiply two numbers and return the result. 42 | 43 | Args: 44 | x: First number 45 | y: Second number 46 | 47 | Returns: 48 | The product of x and y 49 | """ 50 | print("Calling multiply tool.\n") 51 | return x * y 52 | 53 | 54 | # Define a division tool 55 | @mcp.tool(description="Divide one number by another") 56 | def divide(x: float, y: float) -> float: 57 | """Divide x by y and return the result. 58 | 59 | Args: 60 | x: Numerator 61 | y: Denominator (must not be zero) 62 | 63 | Returns: 64 | The quotient (x / y) 65 | 66 | Raises: 67 | ValueError: If y is zero 68 | """ 69 | print("Calling divide tool.\n") 70 | if y == 0: 71 | raise ValueError("Cannot divide by zero") 72 | return x / y 73 | 74 | 75 | if __name__ == "__main__": 76 | mcp.run() 77 | -------------------------------------------------------------------------------- /components/embedding-model/tei/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Text Embedding Inference (TEI)"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["HF_TOKEN"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`; 28 | await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`; 29 | const secretTemplatePath = path.join(DIR, "secret.template.yaml"); 30 | const secretRenderedPath = path.join(DIR, "secret.rendered.yaml"); 31 | const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8"); 32 | const secretTemplate = handlebars.compile(secretTemplateString); 33 | const secretVars = { 34 | HF_TOKEN: process.env.HF_TOKEN, 35 | }; 36 | fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars)); 37 | await $`kubectl apply -f ${secretRenderedPath}`; 38 | const { models } = config["embedding-model"]["tei"]; 39 | await utils.model.addModels(models, "embedding-model", "tei"); 40 | } 41 | 42 | export async function uninstall() { 43 | const { models } = config["embedding-model"]["tei"]; 44 | await utils.model.removeAllModels(models, "embedding-model", "tei"); 45 | await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`; 46 | await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`; 47 | await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`; 48 | } 49 | -------------------------------------------------------------------------------- /components/llm-model/tgi/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "TGI"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["HF_TOKEN"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`; 28 | await $`kubectl apply -f ${path.join(DIR, "pvc-huggingface-cache.yaml")}`; 29 | await $`kubectl apply -f ${path.join(DIR, "pvc-neuron-cache.yaml")}`; 30 | const secretTemplatePath = path.join(DIR, "secret.template.yaml"); 31 | const secretRenderedPath = path.join(DIR, "secret.rendered.yaml"); 32 | const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8"); 33 | const secretTemplate = handlebars.compile(secretTemplateString); 34 | const secretVars = { 35 | HF_TOKEN: process.env.HF_TOKEN, 36 | }; 37 | fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars)); 38 | await $`kubectl apply -f ${secretRenderedPath}`; 39 | await utils.model.addModels(models, "llm-model", "tgi"); 40 | } 41 | 42 | export async function uninstall() { 43 | const { models } = config["llm-model"]["tgi"]; 44 | await utils.model.removeAllModels(models, "llm-model", "tgi"); 45 | await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`; 46 | await $`kubectl delete -f ${path.join(DIR, "pvc-huggingface-cache.yaml")} --ignore-not-found`; 47 | await $`kubectl delete -f ${path.join(DIR, "pvc-neuron-cache.yaml")} --ignore-not-found`; 48 | await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`; 49 | } 50 | -------------------------------------------------------------------------------- /examples/mcp-server/calculator/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Calculator MCP Server"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const { REGION } = process.env; 25 | await utils.terraform.apply(DIR); 26 | const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" }); 27 | cd(DIR); 28 | await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`; 29 | const { useBuildx, arch } = config.docker; 30 | if (useBuildx) { 31 | await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`; 32 | } else { 33 | await $`docker build -t ${ecrRepoUrl}:latest .`; 34 | await $`docker push ${ecrRepoUrl}:latest`; 35 | } 36 | await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`; 37 | const mcpServerTemplatePath = path.join(DIR, "mcp-server.template.yaml"); 38 | const mcpServerRenderedPath = path.join(DIR, "mcp-server.rendered.yaml"); 39 | const mcpServerTemplateString = fs.readFileSync(mcpServerTemplatePath, "utf8"); 40 | const mcpServerTemplate = handlebars.compile(mcpServerTemplateString); 41 | const mcpServerVars = { 42 | useBuildx, 43 | arch, 44 | IMAGE: `${ecrRepoUrl}:latest`, 45 | }; 46 | fs.writeFileSync(mcpServerRenderedPath, mcpServerTemplate(mcpServerVars)); 47 | await $`kubectl apply -f ${DIR}/mcp-server.rendered.yaml`; 48 | } 49 | 50 | export async function uninstall() { 51 | await $`kubectl delete -f ${DIR}/mcp-server.rendered.yaml --ignore-not-found`; 52 | await utils.terraform.destroy(DIR); 53 | } 54 | -------------------------------------------------------------------------------- /components/llm-model/vllm/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "vLLM"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["HF_TOKEN"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`; 28 | await $`kubectl apply -f ${path.join(DIR, "pvc-huggingface-cache.yaml")}`; 29 | await $`kubectl apply -f ${path.join(DIR, "pvc-neuron-cache.yaml")}`; 30 | const secretTemplatePath = path.join(DIR, "secret.template.yaml"); 31 | const secretRenderedPath = path.join(DIR, "secret.rendered.yaml"); 32 | const secretTemplateString = fs.readFileSync(secretTemplatePath, "utf8"); 33 | const secretTemplate = handlebars.compile(secretTemplateString); 34 | const secretVars = { 35 | HF_TOKEN: process.env.HF_TOKEN, 36 | }; 37 | fs.writeFileSync(secretRenderedPath, secretTemplate(secretVars)); 38 | await $`kubectl apply -f ${secretRenderedPath}`; 39 | const { models } = config["llm-model"]["vllm"]; 40 | await utils.model.addModels(models, "llm-model", "vllm"); 41 | } 42 | 43 | export async function uninstall() { 44 | const { models } = config["llm-model"]["vllm"]; 45 | await utils.model.removeAllModels(models, "llm-model", "vllm"); 46 | await $`kubectl delete -f ${path.join(DIR, "secret.rendered.yaml")} --ignore-not-found`; 47 | await $`kubectl delete -f ${path.join(DIR, "pvc-huggingface-cache.yaml")} --ignore-not-found`; 48 | await $`kubectl delete -f ${path.join(DIR, "pvc-neuron-cache.yaml")} --ignore-not-found`; 49 | await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`; 50 | } 51 | -------------------------------------------------------------------------------- /components/o11y/langfuse/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Langfuse"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["LANGFUSE_USERNAME", "LANGFUSE_PASSWORD", "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | 27 | await utils.terraform.apply(DIR); 28 | const tfOutput = await utils.terraform.output(DIR, {}); 29 | const langfuseBucketName = tfOutput.langfuse_bucket_name.value; 30 | 31 | await $`helm repo add langfuse https://langfuse.github.io/langfuse-k8s`; 32 | await $`helm repo update`; 33 | 34 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 35 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 36 | const valuesTemplateString = fs.readFileSync(valuesTemplatePath, "utf8"); 37 | const valuesTemplate = handlebars.compile(valuesTemplateString); 38 | const valuesVars = { 39 | DOMAIN: process.env.DOMAIN, 40 | LANGFUSE_USERNAME: process.env.LANGFUSE_USERNAME, 41 | LANGFUSE_PASSWORD: process.env.LANGFUSE_PASSWORD, 42 | LANGFUSE_PUBLIC_KEY: process.env.LANGFUSE_PUBLIC_KEY, 43 | LANGFUSE_SECRET_KEY: process.env.LANGFUSE_SECRET_KEY, 44 | LANGFUSE_BUCKET_NAME: langfuseBucketName, 45 | AWS_REGION: process.env.AWS_REGION, 46 | }; 47 | fs.writeFileSync(valuesRenderedPath, valuesTemplate(valuesVars)); 48 | await $`helm upgrade --install langfuse langfuse/langfuse --namespace langfuse --create-namespace -f ${valuesRenderedPath}`; 49 | } 50 | 51 | export async function uninstall() { 52 | await $`helm uninstall langfuse --namespace langfuse`; 53 | await utils.terraform.destroy(DIR); 54 | } 55 | -------------------------------------------------------------------------------- /cli-menu.json: -------------------------------------------------------------------------------- 1 | { 2 | "componentCategories": [ 3 | { 4 | "dir": "ai-gateway", 5 | "name": "AI Gateway", 6 | "components": [ 7 | { "dir": "litellm", "name": "LiteLLM" }, 8 | { "dir": "kong", "name": "Kong" } 9 | ] 10 | }, 11 | { 12 | "dir": "llm-model", 13 | "name": "LLM Model", 14 | "components": [ 15 | { "dir": "vllm", "name": "vLLM" }, 16 | { "dir": "sglang", "name": "SGLang" }, 17 | { "dir": "tgi", "name": "TGI" }, 18 | { "dir": "ollama", "name": "Ollama" } 19 | ] 20 | }, 21 | { 22 | "dir": "embedding-model", 23 | "name": "Embedding Model", 24 | "components": [{ "dir": "tei", "name": "Text Embedding Inference (TEI)" }] 25 | }, 26 | { "dir": "guardrail", "name": "Guardrail", "components": [{ "dir": "guardrails-ai", "name": "Guardrails AI" }] }, 27 | { 28 | "dir": "o11y", 29 | "name": "Observability", 30 | "components": [ 31 | { "dir": "langfuse", "name": "Langfuse" }, 32 | { "dir": "mlflow", "name": "MLflow" }, 33 | { "dir": "phoenix", "name": "Phoenix" } 34 | ] 35 | }, 36 | { "dir": "gui-app", "name": "GUI App", "components": [{ "dir": "openwebui", "name": "Open WebUI" }] }, 37 | { 38 | "dir": "vector-database", 39 | "name": "Vector Database", 40 | "components": [ 41 | { "dir": "qdrant", "name": "Qdrant" }, 42 | { "dir": "chroma", "name": "Chroma" }, 43 | { "dir": "milvus", "name": "Milvus" } 44 | ] 45 | }, 46 | { "dir": "workflow-automation", "name": "Workflow Automation", "components": [{ "dir": "n8n", "name": "n8n" }] } 47 | ], 48 | "exampleCategories": [ 49 | { "dir": "mcp-server", "name": "MCP Server", "examples": [{ "dir": "calculator", "name": "Calculator" }] }, 50 | { 51 | "dir": "strands-agents", 52 | "name": "Strands Agents", 53 | "examples": [{ "dir": "calculator-agent", "name": "Calculator Agent" }] 54 | }, 55 | { 56 | "dir": "agno", 57 | "name": "Agno", 58 | "examples": [{ "dir": "calculator-agent", "name": "Calculator Agent" }] 59 | } 60 | ] 61 | } 62 | -------------------------------------------------------------------------------- /components/vector-database/milvus/values.template.yaml: -------------------------------------------------------------------------------- 1 | # https://github.com/Milvus-io/Milvus/issues/40267 2 | # Ingress expodes service port, not 9091 web UI port 3 | # ingress: 4 | # enabled: true 5 | # annotations: 6 | # external-dns.alpha.kubernetes.io/ingress-hostname-source: annotation-only 7 | # nginx.ingress.kubernetes.io/auth-type: basic 8 | # nginx.ingress.kubernetes.io/auth-secret: basic-auth 9 | # nginx.ingress.kubernetes.io/auth-realm: "Authentication Required" 10 | # ingressClassName: nginx 11 | # rules: 12 | # - host: Milvus.{{{DOMAIN}}} 13 | # path: / 14 | # pathType: Prefix 15 | 16 | cluster: 17 | enabled: false 18 | 19 | serviceAccount: 20 | create: true 21 | 22 | etcd: 23 | replicaCount: 1 24 | resources: 25 | requests: 26 | cpu: 200m 27 | memory: 256Mi 28 | limits: 29 | memory: 256Mi 30 | 31 | minio: 32 | enabled: false 33 | externalS3: 34 | enabled: true 35 | host: "s3.{{AWS_REGION}}.amazonaws.com" 36 | port: "443" 37 | useSSL: true 38 | bucketName: "{{MILVUS_BUCKET_NAME}}" 39 | rootPath: "milvus" 40 | useIAM: true 41 | cloudProvider: "aws" 42 | region: "{{AWS_REGION}}" 43 | 44 | pulsarv3: 45 | components: 46 | autorecovery: false 47 | zookeeper: 48 | replicaCount: 1 49 | resources: 50 | requests: 51 | cpu: 200m 52 | memory: 256Mi 53 | limits: 54 | memory: 256Mi 55 | broker: 56 | replicaCount: 1 57 | resources: 58 | requests: 59 | cpu: 500m 60 | memory: 2Gi 61 | limits: 62 | memory: 2Gi 63 | configData: 64 | autoSkipNonRecoverableData: "true" 65 | managedLedgerDefaultEnsembleSize: "1" 66 | managedLedgerDefaultWriteQuorum: "1" 67 | managedLedgerDefaultAckQuorum: "1" 68 | proxy: 69 | replicaCount: 1 70 | resources: 71 | requests: 72 | cpu: 500m 73 | memory: 1Gi 74 | limits: 75 | memory: 1Gi 76 | bookkeeper: 77 | replicaCount: 1 78 | resources: 79 | requests: 80 | cpu: 500m 81 | memory: 2Gi 82 | limits: 83 | memory: 2Gi 84 | -------------------------------------------------------------------------------- /components/llm-model/tgi/model-qwen3-8b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-8b 5 | namespace: tgi 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-8b 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-8b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: tgi 24 | image: ghcr.io/huggingface/text-generation-inference:3.3.4 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=Qwen/Qwen3-8B 35 | - --trust-remote-code 36 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 37 | env: 38 | - name: HF_TOKEN 39 | valueFrom: 40 | secretKeyRef: 41 | name: hf-token 42 | key: token 43 | ports: 44 | - name: http 45 | containerPort: 80 46 | resources: 47 | requests: 48 | cpu: 3 #75% 49 | memory: 24Gi #75% 50 | nvidia.com/gpu: 1 51 | limits: 52 | nvidia.com/gpu: 1 53 | volumeMounts: 54 | - name: huggingface-cache 55 | mountPath: /root/.cache/huggingface 56 | volumes: 57 | - name: huggingface-cache 58 | persistentVolumeClaim: 59 | claimName: huggingface-cache 60 | tolerations: 61 | - key: nvidia.com/gpu 62 | operator: Exists 63 | effect: NoSchedule 64 | --- 65 | apiVersion: v1 66 | kind: Service 67 | metadata: 68 | name: qwen3-8b 69 | namespace: tgi 70 | spec: 71 | selector: 72 | app: qwen3-8b 73 | ports: 74 | - name: http 75 | port: 80 76 | -------------------------------------------------------------------------------- /components/ai-gateway/kong/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $ } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Kong"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const requiredEnvVars = ["KONG_API_KEY", "KONG_API_KEY_HEADER"]; 25 | utils.checkRequiredEnvVars(requiredEnvVars); 26 | const { DOMAIN, KONG_API_KEY, KONG_API_KEY_HEADER } = process.env; 27 | 28 | const valuesTemplatePath = path.join(DIR, "values.template.yaml"); 29 | const valuesRenderedPath = path.join(DIR, "values.rendered.yaml"); 30 | const valuesVars = { 31 | DOMAIN, 32 | }; 33 | utils.renderTemplate(valuesTemplatePath, valuesRenderedPath, valuesVars); 34 | await $`helm repo add kong https://charts.konghq.com`; 35 | await $`helm upgrade --install kong kong/kong --namespace kong --create-namespace -f ${valuesRenderedPath}`; 36 | 37 | const kongTemplatePath = path.join(DIR, "kong.template.yaml"); 38 | const kongRenderedPath = path.join(DIR, "kong.rendered.yaml"); 39 | const kongVars = { 40 | DOMAIN, 41 | KONG_API_KEY, 42 | KONG_API_KEY_HEADER, 43 | }; 44 | utils.renderTemplate(kongTemplatePath, kongRenderedPath, kongVars); 45 | await $`kubectl apply -f ${kongRenderedPath}`; 46 | } 47 | 48 | export async function uninstall() { 49 | const { DOMAIN, KONG_API_KEY, KONG_API_KEY_HEADER } = process.env; 50 | const kongTemplatePath = path.join(DIR, "kong.template.yaml"); 51 | const kongRenderedPath = path.join(DIR, "kong.rendered.yaml"); 52 | const kongVars = { 53 | DOMAIN, 54 | KONG_API_KEY, 55 | KONG_API_KEY_HEADER, 56 | }; 57 | utils.renderTemplate(kongTemplatePath, kongRenderedPath, kongVars); 58 | await $`kubectl delete -f ${kongRenderedPath} --ignore-not-found`; 59 | await $`helm uninstall kong --namespace kong`; 60 | } 61 | -------------------------------------------------------------------------------- /components/ai-gateway/kong/examples/kong.yaml: -------------------------------------------------------------------------------- 1 | # Chat Completions 2 | apiVersion: configuration.konghq.com/v1 3 | kind: KongPlugin 4 | metadata: 5 | name: ai-proxy-qwen3-30b-instruct-fp8-chat 6 | namespace: vllm 7 | annotations: 8 | kubernetes.io/ingress.class: kong 9 | plugin: ai-proxy 10 | config: 11 | logging: 12 | log_statistics: true 13 | model: 14 | name: qwen3-30b-instruct-fp8 15 | options: 16 | upstream_url: http://qwen3-30b-instruct-fp8.vllm:8000/v1/chat/completions 17 | provider: openai 18 | route_type: llm/v1/chat 19 | --- 20 | apiVersion: networking.k8s.io/v1 21 | kind: Ingress 22 | metadata: 23 | name: kong-qwen3-30b-instruct-fp8-chat 24 | namespace: vllm 25 | annotations: 26 | konghq.com/plugins: ai-proxy-qwen3-30b-instruct-fp8-chat 27 | spec: 28 | ingressClassName: kong 29 | rules: 30 | - http: 31 | paths: 32 | - path: /v1/chat/completions 33 | pathType: Prefix 34 | backend: 35 | service: 36 | name: qwen3-30b-instruct-fp8 37 | port: 38 | number: 8000 39 | --- 40 | # Completions 41 | apiVersion: configuration.konghq.com/v1 42 | kind: KongPlugin 43 | metadata: 44 | name: ai-proxy-qwen3-30b-instruct-fp8-completions 45 | namespace: vllm 46 | annotations: 47 | kubernetes.io/ingress.class: kong 48 | plugin: ai-proxy 49 | config: 50 | logging: 51 | log_statistics: true 52 | model: 53 | name: qwen3-30b-instruct-fp8 54 | options: 55 | upstream_url: http://qwen3-30b-instruct-fp8.vllm:8000/v1/completions 56 | provider: openai 57 | route_type: llm/v1/completions 58 | --- 59 | apiVersion: networking.k8s.io/v1 60 | kind: Ingress 61 | metadata: 62 | name: kong-qwen3-30b-instruct-fp8-completions 63 | namespace: vllm 64 | annotations: 65 | konghq.com/plugins: ai-proxy-qwen3-30b-instruct-fp8-completions 66 | spec: 67 | ingressClassName: kong 68 | rules: 69 | - http: 70 | paths: 71 | - path: /v1/completions 72 | pathType: Prefix 73 | backend: 74 | service: 75 | name: qwen3-30b-instruct-fp8 76 | port: 77 | number: 8000 78 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/agent.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: calculator-agent 5 | namespace: agno 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: calculator-agent 11 | template: 12 | metadata: 13 | labels: 14 | app: calculator-agent 15 | spec: 16 | serviceAccountName: calculator-agent 17 | automountServiceAccountToken: false 18 | {{#unless useBuildx}} 19 | nodeSelector: 20 | kubernetes.io/arch: {{{arch}}} 21 | {{/unless}} 22 | containers: 23 | - name: agent 24 | image: {{{IMAGE}}} 25 | env: 26 | - name: USE_BEDROCK 27 | value: "{{{USE_BEDROCK}}}" 28 | {{#if USE_BEDROCK}} 29 | - name: BEDROCK_MODEL 30 | value: {{{BEDROCK_MODEL}}} 31 | {{else}} 32 | - name: LITELLM_BASE_URL 33 | value: {{{LITELLM_BASE_URL}}} 34 | - name: LITELLM_API_KEY 35 | value: {{{LITELLM_API_KEY}}} 36 | - name: LITELLM_MODEL_NAME 37 | value: {{{LITELLM_MODEL_NAME}}} 38 | {{/if}} 39 | - name: USE_MCP_TOOLS 40 | value: "{{{USE_MCP_TOOLS}}}" 41 | {{#if LANGFUSE_HOST}} 42 | - name: LANGFUSE_HOST 43 | value: {{{LANGFUSE_HOST}}} 44 | - name: LANGFUSE_PUBLIC_KEY 45 | value: {{{LANGFUSE_PUBLIC_KEY}}} 46 | - name: LANGFUSE_SECRET_KEY 47 | value: {{{LANGFUSE_SECRET_KEY}}} 48 | {{/if}} 49 | ports: 50 | - name: http 51 | containerPort: 80 52 | resources: 53 | requests: 54 | cpu: 250m 55 | memory: 512Mi 56 | limits: 57 | memory: 512Mi 58 | --- 59 | apiVersion: v1 60 | kind: Service 61 | metadata: 62 | name: calculator-agent 63 | namespace: agno 64 | spec: 65 | selector: 66 | app: calculator-agent 67 | ports: 68 | - name: http 69 | port: 80 70 | --- 71 | apiVersion: v1 72 | kind: ServiceAccount 73 | metadata: 74 | name: calculator-agent 75 | namespace: agno 76 | automountServiceAccountToken: false 77 | -------------------------------------------------------------------------------- /components/llm-model/tgi/model-qwen3-8b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-8b-fp8 5 | namespace: tgi 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-8b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-8b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: tgi 24 | image: ghcr.io/huggingface/text-generation-inference:3.3.4 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=Qwen/Qwen3-8B-FP8 35 | - --trust-remote-code 36 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 37 | env: 38 | - name: HF_TOKEN 39 | valueFrom: 40 | secretKeyRef: 41 | name: hf-token 42 | key: token 43 | - name: CUDA_GRAPHS 44 | value: "0" 45 | ports: 46 | - name: http 47 | containerPort: 80 48 | resources: 49 | requests: 50 | cpu: 3 #75% 51 | memory: 24Gi #75% 52 | nvidia.com/gpu: 1 53 | limits: 54 | nvidia.com/gpu: 1 55 | volumeMounts: 56 | - name: huggingface-cache 57 | mountPath: /root/.cache/huggingface 58 | volumes: 59 | - name: huggingface-cache 60 | persistentVolumeClaim: 61 | claimName: huggingface-cache 62 | tolerations: 63 | - key: nvidia.com/gpu 64 | operator: Exists 65 | effect: NoSchedule 66 | --- 67 | apiVersion: v1 68 | kind: Service 69 | metadata: 70 | name: qwen3-8b-fp8 71 | namespace: tgi 72 | spec: 73 | selector: 74 | app: qwen3-8b-fp8 75 | ports: 76 | - name: http 77 | port: 80 78 | -------------------------------------------------------------------------------- /components/llm-model/tgi/model-deepseek-r1-qwen3-8b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: deepseek-r1-qwen3-8b 5 | namespace: tgi 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: deepseek-r1-qwen3-8b 11 | template: 12 | metadata: 13 | labels: 14 | app: deepseek-r1-qwen3-8b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: tgi 24 | image: ghcr.io/huggingface/text-generation-inference:3.3.4 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=deepseek-ai/DeepSeek-R1-0528-Qwen3-8B 35 | - --trust-remote-code 36 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 37 | env: 38 | - name: HF_TOKEN 39 | valueFrom: 40 | secretKeyRef: 41 | name: hf-token 42 | key: token 43 | # - name: CUDA_GRAPHS 44 | # value: "0" 45 | ports: 46 | - name: http 47 | containerPort: 80 48 | resources: 49 | requests: 50 | cpu: 3 #75% 51 | memory: 24Gi #75% 52 | nvidia.com/gpu: 1 53 | limits: 54 | nvidia.com/gpu: 1 55 | volumeMounts: 56 | - name: huggingface-cache 57 | mountPath: /root/.cache/huggingface 58 | volumes: 59 | - name: huggingface-cache 60 | persistentVolumeClaim: 61 | claimName: huggingface-cache 62 | tolerations: 63 | - key: nvidia.com/gpu 64 | operator: Exists 65 | effect: NoSchedule 66 | --- 67 | apiVersion: v1 68 | kind: Service 69 | metadata: 70 | name: deepseek-r1-qwen3-8b 71 | namespace: tgi 72 | spec: 73 | selector: 74 | app: deepseek-r1-qwen3-8b 75 | ports: 76 | - name: http 77 | port: 80 78 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-gemma3-27b-gptq.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gemma3-27b-gptq 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: gemma3-27b-gptq 11 | template: 12 | metadata: 13 | labels: 14 | app: gemma3-27b-gptq 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - ISTA-DASLab/gemma-3-27b-it-GPTQ-4b-128g 36 | - --served-model-name=gemma3-27b-gptq 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | env: 41 | - name: HUGGING_FACE_HUB_TOKEN 42 | valueFrom: 43 | secretKeyRef: 44 | name: hf-token 45 | key: token 46 | ports: 47 | - name: http 48 | containerPort: 8000 49 | resources: 50 | requests: 51 | cpu: 3 #75% 52 | memory: 24Gi #75% 53 | nvidia.com/gpu: 1 54 | limits: 55 | nvidia.com/gpu: 1 56 | volumeMounts: 57 | - name: huggingface-cache 58 | mountPath: /root/.cache/huggingface 59 | volumes: 60 | - name: huggingface-cache 61 | persistentVolumeClaim: 62 | claimName: huggingface-cache 63 | tolerations: 64 | - key: nvidia.com/gpu 65 | operator: Exists 66 | effect: NoSchedule 67 | --- 68 | apiVersion: v1 69 | kind: Service 70 | metadata: 71 | name: gemma3-27b-gptq 72 | namespace: vllm 73 | spec: 74 | selector: 75 | app: gemma3-27b-gptq 76 | ports: 77 | - name: http 78 | port: 8000 79 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/agent.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: calculator-agent 5 | namespace: strands-agents 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: calculator-agent 11 | template: 12 | metadata: 13 | labels: 14 | app: calculator-agent 15 | spec: 16 | serviceAccountName: calculator-agent 17 | automountServiceAccountToken: false 18 | {{#unless useBuildx}} 19 | nodeSelector: 20 | kubernetes.io/arch: {{{arch}}} 21 | {{/unless}} 22 | containers: 23 | - name: agent 24 | image: {{{IMAGE}}} 25 | env: 26 | - name: USE_BEDROCK 27 | value: "{{{USE_BEDROCK}}}" 28 | {{#if USE_BEDROCK}} 29 | - name: BEDROCK_MODEL 30 | value: {{{BEDROCK_MODEL}}} 31 | {{else}} 32 | - name: LITELLM_BASE_URL 33 | value: {{{LITELLM_BASE_URL}}} 34 | - name: LITELLM_API_KEY 35 | value: {{{LITELLM_API_KEY}}} 36 | - name: LITELLM_MODEL_NAME 37 | value: {{{LITELLM_MODEL_NAME}}} 38 | {{/if}} 39 | - name: USE_MCP_TOOLS 40 | value: "{{{USE_MCP_TOOLS}}}" 41 | - name: USE_MCP_GATEWAY 42 | value: "{{{USE_MCP_GATEWAY}}}" 43 | {{#if LANGFUSE_HOST}} 44 | - name: LANGFUSE_HOST 45 | value: {{{LANGFUSE_HOST}}} 46 | - name: LANGFUSE_PUBLIC_KEY 47 | value: {{{LANGFUSE_PUBLIC_KEY}}} 48 | - name: LANGFUSE_SECRET_KEY 49 | value: {{{LANGFUSE_SECRET_KEY}}} 50 | {{/if}} 51 | ports: 52 | - name: http 53 | containerPort: 80 54 | resources: 55 | requests: 56 | cpu: 250m 57 | memory: 512Mi 58 | limits: 59 | memory: 512Mi 60 | --- 61 | apiVersion: v1 62 | kind: Service 63 | metadata: 64 | name: calculator-agent 65 | namespace: strands-agents 66 | spec: 67 | selector: 68 | app: calculator-agent 69 | ports: 70 | - name: http 71 | port: 80 72 | --- 73 | apiVersion: v1 74 | kind: ServiceAccount 75 | metadata: 76 | name: calculator-agent 77 | namespace: strands-agents 78 | automountServiceAccountToken: false 79 | -------------------------------------------------------------------------------- /ecr-image-sync.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # ECR Image Sync Script 4 | # Usage: ./ecr-image-sync.sh 5 | 6 | set -e 7 | 8 | # List of images to sync (add your images here) 9 | IMAGES=( 10 | "bitnamilegacy/redis:8.2.1-debian-12-r0" 11 | "bitnamilegacy/postgresql:17.5.0-debian-12-r8" 12 | "bitnamilegacy/clickhouse:25.2.1-debian-12-r0" 13 | "bitnamilegacy/valkey:8.0.2-debian-12-r2" 14 | "bitnamilegacy/zookeeper:3.9.3-debian-12-r8" 15 | "bitnamilegacy/minio:2024.12.18-debian-12-r1" 16 | ) 17 | 18 | # Prompt for AWS configuration 19 | read -p "Enter AWS Region: " AWS_REGION 20 | read -p "Enter AWS Account ID: " AWS_ACCOUNT_ID 21 | read -p "Enter Public ECR Registry Alias: " ECR_REGISTRY_ALIAS 22 | 23 | echo "Configuration:" 24 | echo " AWS Region: $AWS_REGION" 25 | echo " AWS Account ID: $AWS_ACCOUNT_ID" 26 | echo " ECR Registry Alias: $ECR_REGISTRY_ALIAS" 27 | echo "" 28 | 29 | # Login to public ECR (always uses us-east-1 for public ECR) 30 | echo "Logging into public ECR..." 31 | aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws 32 | 33 | # Function to process each image 34 | process_image() { 35 | local image=$1 36 | local repo_name=$(echo $image | cut -d':' -f1 | sed 's/.*\///') 37 | local tag=$(echo $image | cut -d':' -f2) 38 | 39 | echo "Processing: $image" 40 | echo " Repository: $repo_name" 41 | echo " Tag: $tag" 42 | 43 | # Check if ECR repository exists, create if not 44 | if ! aws ecr-public describe-repositories --repository-names $repo_name --region us-east-1 2>/dev/null; then 45 | echo " Creating ECR repository: $repo_name" 46 | aws ecr-public create-repository --repository-name $repo_name --region us-east-1 47 | else 48 | echo " ECR repository exists: $repo_name" 49 | fi 50 | 51 | # Use buildx imagetools to copy multi-arch image with manifest list 52 | local ecr_image="public.ecr.aws/$ECR_REGISTRY_ALIAS/$repo_name:$tag" 53 | echo " Copying multi-arch image to: $ecr_image" 54 | docker buildx imagetools create --tag $ecr_image $image 55 | 56 | echo " ✓ Completed: $image" 57 | echo "" 58 | } 59 | 60 | # Process all images in the list 61 | echo "Processing ${#IMAGES[@]} images..." 62 | echo "" 63 | 64 | for image in "${IMAGES[@]}"; do 65 | process_image "$image" 66 | done 67 | 68 | echo "All images processed successfully!" 69 | -------------------------------------------------------------------------------- /components/o11y/mlflow/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | 21 | # S3 Bucket for MLflow 22 | resource "aws_s3_bucket" "mlflow" { 23 | bucket_prefix = "${var.name}-bucket-mlflow-" 24 | force_destroy = true 25 | } 26 | 27 | # Block public access to the S3 bucket 28 | resource "aws_s3_bucket_public_access_block" "mlflow" { 29 | bucket = aws_s3_bucket.mlflow.id 30 | 31 | block_public_acls = true 32 | block_public_policy = true 33 | ignore_public_acls = true 34 | restrict_public_buckets = true 35 | } 36 | 37 | # Enable server-side encryption for the S3 bucket 38 | resource "aws_s3_bucket_server_side_encryption_configuration" "mlflow" { 39 | bucket = aws_s3_bucket.mlflow.id 40 | 41 | rule { 42 | apply_server_side_encryption_by_default { 43 | sse_algorithm = "AES256" 44 | } 45 | } 46 | } 47 | 48 | output "mlflow_bucket_name" { 49 | value = aws_s3_bucket.mlflow.id 50 | } 51 | 52 | resource "aws_iam_role" "mlflow_s3_access" { 53 | name = "${var.name}-${var.region}-mlflow-s3-access" 54 | assume_role_policy = jsonencode({ 55 | Version = "2012-10-17" 56 | Statement = [{ 57 | Effect = "Allow" 58 | Principal = { 59 | Service = "pods.eks.amazonaws.com" 60 | } 61 | Action = ["sts:AssumeRole", "sts:TagSession"] 62 | }] 63 | }) 64 | } 65 | 66 | resource "aws_iam_role_policy" "mlflow_s3_access" { 67 | role = aws_iam_role.mlflow_s3_access.name 68 | policy = jsonencode({ 69 | Version = "2012-10-17" 70 | Statement = [{ 71 | Effect = "Allow" 72 | Action = [ 73 | "s3:GetObject", 74 | "s3:PutObject", 75 | "s3:DeleteObject", 76 | "s3:ListBucket" 77 | ] 78 | Resource = [ 79 | "arn:aws:s3:::${var.name}-bucket-mlflow-*", 80 | "arn:aws:s3:::${var.name}-bucket-mlflow-*/*" 81 | ] 82 | }] 83 | }) 84 | } 85 | 86 | resource "aws_eks_pod_identity_association" "mlflow_s3" { 87 | cluster_name = var.name 88 | namespace = "mlflow" 89 | service_account = "mlflow" 90 | role_arn = aws_iam_role.mlflow_s3_access.arn 91 | } 92 | -------------------------------------------------------------------------------- /components/embedding-model/tei/model-qwen3-embedding-4b-bf16.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-embedding-4b-bf16 5 | namespace: tei 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-embedding-4b-bf16 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-embedding-4b-bf16 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6 22 | containers: 23 | - name: tei 24 | image: ghcr.io/huggingface/text-embeddings-inference:1.8 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=Qwen/Qwen3-Embedding-4B 35 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 36 | env: 37 | - name: HF_TOKEN 38 | valueFrom: 39 | secretKeyRef: 40 | name: hf-token 41 | key: token 42 | ports: 43 | - name: http 44 | containerPort: 80 45 | resources: 46 | requests: 47 | cpu: 3 #75% 48 | memory: 24Gi #75% 49 | nvidia.com/gpu: 1 50 | limits: 51 | nvidia.com/gpu: 1 52 | volumeMounts: 53 | - name: huggingface-cache 54 | mountPath: /root/.cache/huggingface 55 | - name: shm 56 | mountPath: /dev/shm 57 | volumes: 58 | - name: huggingface-cache 59 | persistentVolumeClaim: 60 | claimName: huggingface-cache 61 | - name: shm 62 | emptyDir: 63 | medium: Memory 64 | sizeLimit: 10Gi 65 | tolerations: 66 | - key: nvidia.com/gpu 67 | operator: Exists 68 | effect: NoSchedule 69 | --- 70 | apiVersion: v1 71 | kind: Service 72 | metadata: 73 | name: qwen3-embedding-4b-bf16 74 | namespace: tei 75 | spec: 76 | selector: 77 | app: qwen3-embedding-4b-bf16 78 | ports: 79 | - name: http 80 | port: 80 81 | -------------------------------------------------------------------------------- /components/embedding-model/tei/model-qwen3-embedding-8b-bf16.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-embedding-8b-bf16 5 | namespace: tei 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-embedding-8b-bf16 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-embedding-8b-bf16 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6 22 | containers: 23 | - name: tei 24 | image: ghcr.io/huggingface/text-embeddings-inference:1.8 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=Qwen/Qwen3-Embedding-8B 35 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 36 | env: 37 | - name: HF_TOKEN 38 | valueFrom: 39 | secretKeyRef: 40 | name: hf-token 41 | key: token 42 | ports: 43 | - name: http 44 | containerPort: 80 45 | resources: 46 | requests: 47 | cpu: 3 #75% 48 | memory: 12Gi #75% 49 | nvidia.com/gpu: 1 50 | limits: 51 | nvidia.com/gpu: 1 52 | volumeMounts: 53 | - name: huggingface-cache 54 | mountPath: /root/.cache/huggingface 55 | - name: shm 56 | mountPath: /dev/shm 57 | volumes: 58 | - name: huggingface-cache 59 | persistentVolumeClaim: 60 | claimName: huggingface-cache 61 | - name: shm 62 | emptyDir: 63 | medium: Memory 64 | sizeLimit: 10Gi 65 | tolerations: 66 | - key: nvidia.com/gpu 67 | operator: Exists 68 | effect: NoSchedule 69 | --- 70 | apiVersion: v1 71 | kind: Service 72 | metadata: 73 | name: qwen3-embedding-8b-bf16 74 | namespace: tei 75 | spec: 76 | selector: 77 | app: qwen3-embedding-8b-bf16 78 | ports: 79 | - name: http 80 | port: 80 81 | -------------------------------------------------------------------------------- /components/embedding-model/tei/model-qwen3-embedding-06b-bf16.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-embedding-06b-bf16 5 | namespace: tei 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-embedding-06b-bf16 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-embedding-06b-bf16 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6 22 | containers: 23 | - name: tei 24 | image: ghcr.io/huggingface/text-embeddings-inference:1.8 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | args: 34 | - --model-id=Qwen/Qwen3-Embedding-0.6B 35 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 36 | env: 37 | - name: HF_TOKEN 38 | valueFrom: 39 | secretKeyRef: 40 | name: hf-token 41 | key: token 42 | ports: 43 | - name: http 44 | containerPort: 80 45 | resources: 46 | requests: 47 | cpu: 3 #75% 48 | memory: 12Gi #75% 49 | nvidia.com/gpu: 1 50 | limits: 51 | nvidia.com/gpu: 1 52 | volumeMounts: 53 | - name: huggingface-cache 54 | mountPath: /root/.cache/huggingface 55 | - name: shm 56 | mountPath: /dev/shm 57 | volumes: 58 | - name: huggingface-cache 59 | persistentVolumeClaim: 60 | claimName: huggingface-cache 61 | - name: shm 62 | emptyDir: 63 | medium: Memory 64 | sizeLimit: 10Gi 65 | tolerations: 66 | - key: nvidia.com/gpu 67 | operator: Exists 68 | effect: NoSchedule 69 | --- 70 | apiVersion: v1 71 | kind: Service 72 | metadata: 73 | name: qwen3-embedding-06b-bf16 74 | namespace: tei 75 | spec: 76 | selector: 77 | app: qwen3-embedding-06b-bf16 78 | ports: 79 | - name: http 80 | port: 80 81 | -------------------------------------------------------------------------------- /components/vector-database/milvus/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | 21 | # S3 Bucket for Milvus 22 | resource "aws_s3_bucket" "milvus" { 23 | bucket_prefix = "${var.name}-bucket-milvus-" 24 | force_destroy = true 25 | } 26 | 27 | # Block public access to the S3 bucket 28 | resource "aws_s3_bucket_public_access_block" "milvus" { 29 | bucket = aws_s3_bucket.milvus.id 30 | 31 | block_public_acls = true 32 | block_public_policy = true 33 | ignore_public_acls = true 34 | restrict_public_buckets = true 35 | } 36 | 37 | # Enable server-side encryption for the S3 bucket 38 | resource "aws_s3_bucket_server_side_encryption_configuration" "milvus" { 39 | bucket = aws_s3_bucket.milvus.id 40 | 41 | rule { 42 | apply_server_side_encryption_by_default { 43 | sse_algorithm = "AES256" 44 | } 45 | } 46 | } 47 | 48 | output "milvus_bucket_name" { 49 | value = aws_s3_bucket.milvus.id 50 | } 51 | 52 | resource "aws_iam_role" "milvus_s3_access" { 53 | name = "${var.name}-${var.region}-milvus-s3-access" 54 | assume_role_policy = jsonencode({ 55 | Version = "2012-10-17" 56 | Statement = [{ 57 | Effect = "Allow" 58 | Principal = { 59 | Service = "pods.eks.amazonaws.com" 60 | } 61 | Action = ["sts:AssumeRole", "sts:TagSession"] 62 | }] 63 | }) 64 | } 65 | 66 | resource "aws_iam_role_policy" "milvus_s3_access" { 67 | role = aws_iam_role.milvus_s3_access.name 68 | policy = jsonencode({ 69 | Version = "2012-10-17" 70 | Statement = [{ 71 | Effect = "Allow" 72 | Action = [ 73 | "s3:GetObject", 74 | "s3:PutObject", 75 | "s3:DeleteObject", 76 | "s3:ListBucket" 77 | ] 78 | Resource = [ 79 | "arn:aws:s3:::${var.name}-bucket-milvus-*", 80 | "arn:aws:s3:::${var.name}-bucket-milvus-*/*" 81 | ] 82 | }] 83 | }) 84 | } 85 | 86 | resource "aws_eks_pod_identity_association" "milvus_s3" { 87 | cluster_name = var.name 88 | namespace = "milvus" 89 | service_account = "milvus" 90 | role_arn = aws_iam_role.milvus_s3_access.arn 91 | } 92 | -------------------------------------------------------------------------------- /components/ai-gateway/kong/values.template.yaml: -------------------------------------------------------------------------------- 1 | proxy: 2 | enabled: true 3 | type: ClusterIP 4 | ingress: 5 | enabled: true 6 | hostname: kong.{{{DOMAIN}}} 7 | path: / 8 | pathType: Prefix 9 | ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 10 | annotations: 11 | alb.ingress.kubernetes.io/target-type: ip 12 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 13 | http: 14 | enabled: true 15 | tls: 16 | enabled: false 17 | 18 | # Note. Kong Enterprise RBAC is required to securely expose Kong Manager via Ingress. 19 | # Since Kong Manager call Kong Admin API directly from the browser, we cannot just put them behind Nginx Ingress. 20 | # Using: 21 | # kubectl -n kong port-forward svc/kong-kong-admin 8001:800 22 | # kubectl -n kong port-forward svc/kong-kong-manager 8002:8002 23 | manager: 24 | enabled: true 25 | type: ClusterIP 26 | # ingress: 27 | # enabled: true 28 | # hostname: kong-manager.{{{DOMAIN}}} 29 | # path: / 30 | # pathType: Prefix 31 | # ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 32 | # annotations: 33 | # alb.ingress.kubernetes.io/target-type: ip 34 | # alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 35 | http: 36 | enabled: true 37 | tls: 38 | enabled: false 39 | 40 | admin: 41 | enabled: true 42 | type: ClusterIP 43 | # ingress: 44 | # enabled: true 45 | # hostname: kong-admin.{{{DOMAIN}}} 46 | # path: / 47 | # pathType: Prefix 48 | # ingressClassName: {{#if DOMAIN}}shared-{{/if}}internet-facing-alb 49 | # annotations: 50 | # alb.ingress.kubernetes.io/target-type: ip 51 | # alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]' 52 | http: 53 | enabled: true 54 | tls: 55 | enabled: false 56 | 57 | postgresql: 58 | enabled: true 59 | auth: 60 | postgresPassword: Pass@123 61 | password: Pass@123 62 | image: 63 | registry: public.ecr.aws 64 | repository: agentic-ai-platforms-on-k8s/postgresql 65 | tag: 17.5.0-debian-12-r8 66 | 67 | env: 68 | database: postgres 69 | # admin_gui_url: https://kong-manager.{{{DOMAIN}}} 70 | # admin_gui_api_url: https://kong-admin.{{{DOMAIN}}} 71 | # admin_gui_session_conf: '{"secret":"secret","storage":"kong","cookie_secure":false}' 72 | # password: {{{KONG_MANAGER_PASSWORD}}} 73 | 74 | # enterprise: 75 | # rbac: 76 | # enabled: true 77 | # admin_gui_auth: basic-auth -------------------------------------------------------------------------------- /components/llm-model/vllm/model-deepseek-r1-qwen3-8b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: deepseek-r1-qwen3-8b 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: deepseek-r1-qwen3-8b 11 | template: 12 | metadata: 13 | labels: 14 | app: deepseek-r1-qwen3-8b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - deepseek-ai/DeepSeek-R1-0528-Qwen3-8B 36 | - --served-model-name=deepseek-r1-qwen3-8b 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | # DeepSeek-R1 specific 41 | - --reasoning-parser=deepseek_r1 42 | env: 43 | - name: HUGGING_FACE_HUB_TOKEN 44 | valueFrom: 45 | secretKeyRef: 46 | name: hf-token 47 | key: token 48 | ports: 49 | - name: http 50 | containerPort: 8000 51 | resources: 52 | requests: 53 | cpu: 3 #75% 54 | memory: 24Gi #75% 55 | nvidia.com/gpu: 1 56 | limits: 57 | nvidia.com/gpu: 1 58 | volumeMounts: 59 | - name: huggingface-cache 60 | mountPath: /root/.cache/huggingface 61 | volumes: 62 | - name: huggingface-cache 63 | persistentVolumeClaim: 64 | claimName: huggingface-cache 65 | tolerations: 66 | - key: nvidia.com/gpu 67 | operator: Exists 68 | effect: NoSchedule 69 | --- 70 | apiVersion: v1 71 | kind: Service 72 | metadata: 73 | name: deepseek-r1-qwen3-8b 74 | namespace: vllm 75 | spec: 76 | selector: 77 | app: deepseek-r1-qwen3-8b 78 | ports: 79 | - name: http 80 | port: 8000 81 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-qwen3-30b-thinking-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-30b-thinking-fp8 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-30b-thinking-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-30b-thinking-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 36 | - --served-model-name=qwen3-30b-thinking-fp8 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | # Qwen3 specific 41 | - --reasoning-parser=qwen3 42 | env: 43 | - name: HUGGING_FACE_HUB_TOKEN 44 | valueFrom: 45 | secretKeyRef: 46 | name: hf-token 47 | key: token 48 | ports: 49 | - name: http 50 | containerPort: 8000 51 | resources: 52 | requests: 53 | cpu: 3 #75% 54 | memory: 24Gi #75% 55 | nvidia.com/gpu: 1 56 | limits: 57 | nvidia.com/gpu: 1 58 | volumeMounts: 59 | - name: huggingface-cache 60 | mountPath: /root/.cache/huggingface 61 | volumes: 62 | - name: huggingface-cache 63 | persistentVolumeClaim: 64 | claimName: huggingface-cache 65 | tolerations: 66 | - key: nvidia.com/gpu 67 | operator: Exists 68 | effect: NoSchedule 69 | --- 70 | apiVersion: v1 71 | kind: Service 72 | metadata: 73 | name: qwen3-30b-thinking-fp8 74 | namespace: vllm 75 | spec: 76 | selector: 77 | app: qwen3-30b-thinking-fp8 78 | ports: 79 | - name: http 80 | port: 8000 81 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-gpt-oss-20b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gpt-oss-20b 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: gpt-oss-20b 11 | template: 12 | metadata: 13 | labels: 14 | app: gpt-oss-20b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:gptoss 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - openai/gpt-oss-20b 36 | - --served-model-name=gpt-oss-20b 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | # - --max-model-len=131072 # 128K 41 | env: 42 | - name: HUGGING_FACE_HUB_TOKEN 43 | valueFrom: 44 | secretKeyRef: 45 | name: hf-token 46 | key: token 47 | - name: VLLM_ATTENTION_BACKEND 48 | value: TRITON_ATTN_VLLM_V1 49 | resources: 50 | requests: 51 | cpu: 3 #75% 52 | memory: 24Gi #75% 53 | nvidia.com/gpu: 1 54 | limits: 55 | nvidia.com/gpu: 1 56 | volumeMounts: 57 | - name: huggingface-cache 58 | mountPath: /root/.cache/huggingface 59 | - name: shm 60 | mountPath: /dev/shm 61 | volumes: 62 | - name: huggingface-cache 63 | persistentVolumeClaim: 64 | claimName: huggingface-cache 65 | - name: shm 66 | emptyDir: 67 | medium: Memory 68 | tolerations: 69 | - key: nvidia.com/gpu 70 | operator: Exists 71 | effect: NoSchedule 72 | --- 73 | apiVersion: v1 74 | kind: Service 75 | metadata: 76 | name: gpt-oss-20b 77 | namespace: vllm 78 | spec: 79 | selector: 80 | app: gpt-oss-20b 81 | ports: 82 | - name: http 83 | port: 8000 84 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-qwen3-30b-instruct-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-30b-instruct-fp8 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-30b-instruct-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-30b-instruct-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 36 | - --served-model-name=qwen3-30b-instruct-fp8 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | # Qwen3 specific 41 | - --enable-auto-tool-choice 42 | - --tool-call-parser=hermes 43 | env: 44 | - name: HUGGING_FACE_HUB_TOKEN 45 | valueFrom: 46 | secretKeyRef: 47 | name: hf-token 48 | key: token 49 | ports: 50 | - name: http 51 | containerPort: 8000 52 | resources: 53 | requests: 54 | cpu: 3 #75% 55 | memory: 24Gi #75% 56 | nvidia.com/gpu: 1 57 | limits: 58 | nvidia.com/gpu: 1 59 | volumeMounts: 60 | - name: huggingface-cache 61 | mountPath: /root/.cache/huggingface 62 | volumes: 63 | - name: huggingface-cache 64 | persistentVolumeClaim: 65 | claimName: huggingface-cache 66 | tolerations: 67 | - key: nvidia.com/gpu 68 | operator: Exists 69 | effect: NoSchedule 70 | --- 71 | apiVersion: v1 72 | kind: Service 73 | metadata: 74 | name: qwen3-30b-instruct-fp8 75 | namespace: vllm 76 | spec: 77 | selector: 78 | app: qwen3-30b-instruct-fp8 79 | ports: 80 | - name: http 81 | port: 8000 82 | -------------------------------------------------------------------------------- /components/llm-model/sglang/model-gpt-oss-20b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gpt-oss-20b 5 | namespace: sglang 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: gpt-oss-20b 11 | template: 12 | metadata: 13 | labels: 14 | app: gpt-oss-20b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: sglang 24 | image: docker.io/lmsysorg/sglang:v0.5.0rc1-cu126 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["python3", "-m", "sglang.launch_server"] 34 | args: 35 | - --model-path=openai/gpt-oss-20b 36 | - --host=0.0.0.0 37 | - --port=30000 38 | - --trust-remote-code 39 | - --mem-fraction-static=0.90 40 | - --context-length=32768 # 32K 41 | env: 42 | - name: HF_TOKEN 43 | valueFrom: 44 | secretKeyRef: 45 | name: hf-token 46 | key: token 47 | ports: 48 | - name: http 49 | containerPort: 30000 50 | resources: 51 | requests: 52 | cpu: 3 #75% 53 | memory: 24Gi #75% 54 | nvidia.com/gpu: 1 55 | limits: 56 | nvidia.com/gpu: 1 57 | volumeMounts: 58 | - name: huggingface-cache 59 | mountPath: /root/.cache/huggingface 60 | - name: shm 61 | mountPath: /dev/shm 62 | volumes: 63 | - name: huggingface-cache 64 | persistentVolumeClaim: 65 | claimName: huggingface-cache 66 | - name: shm 67 | emptyDir: 68 | medium: Memory 69 | sizeLimit: 10Gi 70 | tolerations: 71 | - key: nvidia.com/gpu 72 | operator: Exists 73 | effect: NoSchedule 74 | --- 75 | apiVersion: v1 76 | kind: Service 77 | metadata: 78 | name: gpt-oss-20b 79 | namespace: sglang 80 | spec: 81 | selector: 82 | app: gpt-oss-20b 83 | ports: 84 | - name: http 85 | port: 30000 86 | -------------------------------------------------------------------------------- /components/o11y/langfuse/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "name" { 6 | type = string 7 | default = "genai-on-eks" 8 | } 9 | terraform { 10 | required_providers { 11 | aws = { 12 | source = "hashicorp/aws" 13 | version = "~> 5.96.0" 14 | } 15 | } 16 | } 17 | provider "aws" { 18 | region = var.region 19 | } 20 | 21 | # S3 Bucket for Langfuse 22 | resource "aws_s3_bucket" "langfuse" { 23 | bucket_prefix = "${var.name}-bucket-langfuse-" 24 | force_destroy = true 25 | } 26 | 27 | # Block public access to the S3 bucket 28 | resource "aws_s3_bucket_public_access_block" "langfuse" { 29 | bucket = aws_s3_bucket.langfuse.id 30 | 31 | block_public_acls = true 32 | block_public_policy = true 33 | ignore_public_acls = true 34 | restrict_public_buckets = true 35 | } 36 | 37 | # Enable server-side encryption for the S3 bucket 38 | resource "aws_s3_bucket_server_side_encryption_configuration" "langfuse" { 39 | bucket = aws_s3_bucket.langfuse.id 40 | 41 | rule { 42 | apply_server_side_encryption_by_default { 43 | sse_algorithm = "AES256" 44 | } 45 | } 46 | } 47 | 48 | output "langfuse_bucket_name" { 49 | value = aws_s3_bucket.langfuse.id 50 | } 51 | 52 | output "langfuse_s3_role_arn" { 53 | value = aws_iam_role.langfuse_s3_access.arn 54 | } 55 | 56 | resource "aws_iam_role" "langfuse_s3_access" { 57 | name = "${var.name}-${var.region}-langfuse-s3-access" 58 | assume_role_policy = jsonencode({ 59 | Version = "2012-10-17" 60 | Statement = [{ 61 | Effect = "Allow" 62 | Principal = { 63 | Service = "pods.eks.amazonaws.com" 64 | } 65 | Action = ["sts:AssumeRole", "sts:TagSession"] 66 | }] 67 | }) 68 | } 69 | 70 | resource "aws_iam_role_policy" "langfuse_s3_access" { 71 | role = aws_iam_role.langfuse_s3_access.name 72 | policy = jsonencode({ 73 | Version = "2012-10-17" 74 | Statement = [{ 75 | Effect = "Allow" 76 | Action = [ 77 | "s3:GetObject", 78 | "s3:PutObject", 79 | "s3:DeleteObject", 80 | "s3:ListBucket" 81 | ] 82 | Resource = [ 83 | "arn:aws:s3:::${var.name}-bucket-langfuse-*", 84 | "arn:aws:s3:::${var.name}-bucket-langfuse-*/*" 85 | ] 86 | }] 87 | }) 88 | } 89 | 90 | resource "aws_eks_pod_identity_association" "langfuse_s3" { 91 | cluster_name = var.name 92 | namespace = "langfuse" 93 | service_account = "langfuse" 94 | role_arn = aws_iam_role.langfuse_s3_access.arn 95 | } 96 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-qwen3-32b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-32b-fp8 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-32b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-32b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - Qwen/Qwen3-32B-FP8 36 | - --served-model-name=qwen3-32b-fp8 37 | - --trust-remote-code 38 | # - --gpu-memory-utilization=0.90 39 | - --gpu-memory-utilization=0.95 40 | - --max-model-len=32768 # 32K 41 | # - --max-model-len=16384 # 16K 42 | # Qwen3 specific 43 | - --enable-auto-tool-choice 44 | - --tool-call-parser=hermes 45 | - --reasoning-parser=qwen3 46 | env: 47 | - name: HUGGING_FACE_HUB_TOKEN 48 | valueFrom: 49 | secretKeyRef: 50 | name: hf-token 51 | key: token 52 | ports: 53 | - name: http 54 | containerPort: 8000 55 | resources: 56 | requests: 57 | cpu: 3 #75% 58 | memory: 24Gi #75% 59 | nvidia.com/gpu: 1 60 | limits: 61 | nvidia.com/gpu: 1 62 | volumeMounts: 63 | - name: huggingface-cache 64 | mountPath: /root/.cache/huggingface 65 | volumes: 66 | - name: huggingface-cache 67 | persistentVolumeClaim: 68 | claimName: huggingface-cache 69 | tolerations: 70 | - key: nvidia.com/gpu 71 | operator: Exists 72 | effect: NoSchedule 73 | --- 74 | apiVersion: v1 75 | kind: Service 76 | metadata: 77 | name: qwen3-32b-fp8 78 | namespace: vllm 79 | spec: 80 | selector: 81 | app: qwen3-32b-fp8 82 | ports: 83 | - name: http 84 | port: 8000 85 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-qwen3-coder-30b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-coder-30b-fp8 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-coder-30b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-coder-30b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8 36 | - --served-model-name=qwen3-coder-30b-fp8 37 | - --trust-remote-code 38 | # - --gpu-memory-utilization=0.90 39 | - --gpu-memory-utilization=0.95 40 | # - --max-model-len=32768 # 32K 41 | - --max-model-len=131072 # 128K 42 | # Qwen3 specific 43 | - --enable-auto-tool-choice 44 | - --tool-call-parser=qwen3_coder 45 | env: 46 | - name: HUGGING_FACE_HUB_TOKEN 47 | valueFrom: 48 | secretKeyRef: 49 | name: hf-token 50 | key: token 51 | ports: 52 | - name: http 53 | containerPort: 8000 54 | resources: 55 | requests: 56 | cpu: 3 #75% 57 | memory: 24Gi #75% 58 | nvidia.com/gpu: 1 59 | limits: 60 | nvidia.com/gpu: 1 61 | volumeMounts: 62 | - name: huggingface-cache 63 | mountPath: /root/.cache/huggingface 64 | volumes: 65 | - name: huggingface-cache 66 | persistentVolumeClaim: 67 | claimName: huggingface-cache 68 | tolerations: 69 | - key: nvidia.com/gpu 70 | operator: Exists 71 | effect: NoSchedule 72 | --- 73 | apiVersion: v1 74 | kind: Service 75 | metadata: 76 | name: qwen3-coder-30b-fp8 77 | namespace: vllm 78 | spec: 79 | selector: 80 | app: qwen3-coder-30b-fp8 81 | ports: 82 | - name: http 83 | port: 8000 84 | -------------------------------------------------------------------------------- /examples/agno/calculator-agent/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Agno - Calculator Agent"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const { REGION } = process.env; 25 | await utils.terraform.apply(DIR); 26 | const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" }); 27 | cd(DIR); 28 | await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`; 29 | const { useBuildx, arch } = config.docker; 30 | if (useBuildx) { 31 | await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`; 32 | } else { 33 | await $`docker build -t ${ecrRepoUrl}:latest .`; 34 | await $`docker push ${ecrRepoUrl}:latest`; 35 | } 36 | await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`; 37 | const agentTemplatePath = path.join(DIR, "agent.template.yaml"); 38 | const agentRenderedPath = path.join(DIR, "agent.rendered.yaml"); 39 | const agentTemplateString = fs.readFileSync(agentTemplatePath, "utf8"); 40 | const agentTemplate = handlebars.compile(agentTemplateString); 41 | const { LITELLM_API_KEY } = process.env; 42 | const agentVars = { 43 | useBuildx, 44 | arch, 45 | IMAGE: `${ecrRepoUrl}:latest`, 46 | ...config["examples"]["agno"]["calculator-agent"].env, 47 | LITELLM_BASE_URL: `http://litellm.litellm:4000/v1`, 48 | LITELLM_API_KEY: LITELLM_API_KEY, 49 | }; 50 | const result = await $`kubectl get pod -n langfuse -l app=web --ignore-not-found`; 51 | if (result.stdout.includes("langfuse")) { 52 | agentVars.LANGFUSE_HOST = "http://langfuse-web.langfuse:3000"; 53 | agentVars.LANGFUSE_PUBLIC_KEY = process.env.LANGFUSE_PUBLIC_KEY; 54 | agentVars.LANGFUSE_SECRET_KEY = process.env.LANGFUSE_SECRET_KEY; 55 | } 56 | fs.writeFileSync(agentRenderedPath, agentTemplate(agentVars)); 57 | await $`kubectl apply -f ${DIR}/agent.rendered.yaml`; 58 | } 59 | 60 | export async function uninstall() { 61 | await $`kubectl delete -f ${DIR}/agent.rendered.yaml --ignore-not-found`; 62 | await utils.terraform.destroy(DIR); 63 | } 64 | -------------------------------------------------------------------------------- /examples/strands-agents/calculator-agent/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import handlebars from "handlebars"; 7 | import { $, cd } from "zx"; 8 | $.verbose = true; 9 | 10 | export const name = "Calculator Agent"; 11 | const __filename = fileURLToPath(import.meta.url); 12 | const DIR = path.dirname(__filename); 13 | let BASE_DIR; 14 | let config; 15 | let utils; 16 | 17 | export async function init(_BASE_DIR, _config, _utils) { 18 | BASE_DIR = _BASE_DIR; 19 | config = _config; 20 | utils = _utils; 21 | } 22 | 23 | export async function install() { 24 | const { REGION } = process.env; 25 | await utils.terraform.apply(DIR); 26 | const ecrRepoUrl = await utils.terraform.output(DIR, { outputName: "ecr_repository_url" }); 27 | cd(DIR); 28 | await $`aws ecr get-login-password --region ${REGION} | docker login --username AWS --password-stdin ${ecrRepoUrl.split("/")[0]}`; 29 | const { useBuildx, arch } = config.docker; 30 | if (useBuildx) { 31 | await $`docker buildx build --platform linux/amd64,linux/arm64 -t ${ecrRepoUrl}:latest --push .`; 32 | } else { 33 | await $`docker build -t ${ecrRepoUrl}:latest .`; 34 | await $`docker push ${ecrRepoUrl}:latest`; 35 | } 36 | await $`kubectl apply -f ${path.join(DIR, "..", "namespace.yaml")}`; 37 | const agentTemplatePath = path.join(DIR, "agent.template.yaml"); 38 | const agentRenderedPath = path.join(DIR, "agent.rendered.yaml"); 39 | const agentTemplateString = fs.readFileSync(agentTemplatePath, "utf8"); 40 | const agentTemplate = handlebars.compile(agentTemplateString); 41 | const { LITELLM_API_KEY } = process.env; 42 | const agentVars = { 43 | useBuildx, 44 | arch, 45 | IMAGE: `${ecrRepoUrl}:latest`, 46 | ...config["examples"]["strands-agents"]["calculator-agent"].env, 47 | LITELLM_BASE_URL: `http://litellm.litellm:4000`, 48 | LITELLM_API_KEY: LITELLM_API_KEY, 49 | }; 50 | const result = await $`kubectl get pod -n langfuse -l app=web --ignore-not-found`; 51 | if (result.stdout.includes("langfuse")) { 52 | agentVars.LANGFUSE_HOST = "http://langfuse-web.langfuse:3000"; 53 | agentVars.LANGFUSE_PUBLIC_KEY = process.env.LANGFUSE_PUBLIC_KEY; 54 | agentVars.LANGFUSE_SECRET_KEY = process.env.LANGFUSE_SECRET_KEY; 55 | } 56 | fs.writeFileSync(agentRenderedPath, agentTemplate(agentVars)); 57 | await $`kubectl apply -f ${DIR}/agent.rendered.yaml`; 58 | } 59 | 60 | export async function uninstall() { 61 | await $`kubectl delete -f ${DIR}/agent.rendered.yaml --ignore-not-found`; 62 | await utils.terraform.destroy(DIR); 63 | } 64 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-gpt-oss-120b.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gpt-oss-120b 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: gpt-oss-120b 11 | template: 12 | metadata: 13 | labels: 14 | app: gpt-oss-120b 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | # {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | {{{KARPENTER_PREFIX}}}/instance-category: p 23 | {{{KARPENTER_PREFIX}}}/instance-generation: "5" 24 | containers: 25 | - name: vllm 26 | image: vllm/vllm-openai:gptoss 27 | imagePullPolicy: IfNotPresent 28 | securityContext: 29 | allowPrivilegeEscalation: false 30 | capabilities: 31 | drop: 32 | - NET_RAW 33 | seccompProfile: 34 | type: RuntimeDefault 35 | command: ["vllm", "serve"] 36 | args: 37 | - openai/gpt-oss-120b 38 | - --served-model-name=gpt-oss-120b 39 | - --trust-remote-code 40 | - --gpu-memory-utilization=0.90 41 | - --max-model-len=32768 # 32K 42 | # - --max-model-len=131072 # 128K 43 | - --tensor-parallel-size=8 44 | - --async-scheduling 45 | env: 46 | - name: HUGGING_FACE_HUB_TOKEN 47 | valueFrom: 48 | secretKeyRef: 49 | name: hf-token 50 | key: token 51 | ports: 52 | - name: http 53 | containerPort: 8000 54 | resources: 55 | requests: 56 | nvidia.com/gpu: 8 57 | limits: 58 | nvidia.com/gpu: 8 59 | volumeMounts: 60 | - name: huggingface-cache 61 | mountPath: /root/.cache/huggingface 62 | - name: shm 63 | mountPath: /dev/shm 64 | volumes: 65 | - name: huggingface-cache 66 | persistentVolumeClaim: 67 | claimName: huggingface-cache 68 | - name: shm 69 | emptyDir: 70 | medium: Memory 71 | tolerations: 72 | - key: nvidia.com/gpu 73 | operator: Exists 74 | effect: NoSchedule 75 | --- 76 | apiVersion: v1 77 | kind: Service 78 | metadata: 79 | name: gpt-oss-120b 80 | namespace: vllm 81 | spec: 82 | selector: 83 | app: gpt-oss-120b 84 | ports: 85 | - name: http 86 | port: 8000 87 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-qwen3-8b-neuron.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-8b-neuron 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-8b-neuron 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-8b-neuron 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | node.kubernetes.io/instance-type: inf2.xlarge 22 | containers: 23 | - name: vllm 24 | image: public.ecr.aws/agentic-ai-platforms-on-k8s/vllm-neuron:qwen3-8b-optimum-neuron 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - /root/.cache/neuron/Qwen/Qwen3-8B 36 | - --served-model-name=qwen3-8b-neuron 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | # - --max-model-len=32768 # 32K 40 | # Qwen3 specific 41 | - --enable-auto-tool-choice 42 | - --tool-call-parser=hermes 43 | - --reasoning-parser=qwen3 44 | # Neuron specific 45 | - --tensor-parallel-size=2 46 | # - --gpu-memory-utilization=0.95 47 | - --max-num-seqs=2 48 | - --max-model-len=8192 49 | env: 50 | - name: HF_HOME 51 | value: /root/.cache/huggingface 52 | - name: HF_HUB_CACHE 53 | value: /root/.cache/huggingface/hub 54 | - name: NEURON_RT_NUM_CORES 55 | value: "2" 56 | - name: NEURON_RT_VISIBLE_CORES 57 | value: "0-1" 58 | ports: 59 | - name: http 60 | containerPort: 8000 61 | resources: 62 | requests: 63 | cpu: 3 #75% 64 | memory: 12Gi #75% 65 | aws.amazon.com/neuroncore: 2 66 | limits: 67 | aws.amazon.com/neuroncore: 2 68 | tolerations: 69 | - key: aws.amazon.com/neuron 70 | operator: Exists 71 | effect: NoSchedule 72 | --- 73 | apiVersion: v1 74 | kind: Service 75 | metadata: 76 | name: qwen3-8b-neuron 77 | namespace: vllm 78 | spec: 79 | selector: 80 | app: qwen3-8b-neuron 81 | ports: 82 | - name: http 83 | port: 8000 84 | -------------------------------------------------------------------------------- /components/llm-model/sglang/model-qwen3-32b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-32b-fp8 5 | namespace: sglang 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-32b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-32b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: sglang 24 | image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["python3", "-m", "sglang.launch_server"] 34 | args: 35 | - --model-path=Qwen/Qwen3-32B-FP8 36 | - --host=0.0.0.0 37 | - --port=30000 38 | - --trust-remote-code 39 | - --mem-fraction-static=0.90 40 | - --context-length=32768 # 32K 41 | # Qwen3 specific 42 | - --tool-call-parser=qwen25 43 | - --reasoning-parser=qwen3 44 | env: 45 | - name: HF_TOKEN 46 | valueFrom: 47 | secretKeyRef: 48 | name: hf-token 49 | key: token 50 | ports: 51 | - name: http 52 | containerPort: 30000 53 | resources: 54 | requests: 55 | cpu: 3 #75% 56 | memory: 24Gi #75% 57 | nvidia.com/gpu: 1 58 | limits: 59 | nvidia.com/gpu: 1 60 | volumeMounts: 61 | - name: huggingface-cache 62 | mountPath: /root/.cache/huggingface 63 | - name: shm 64 | mountPath: /dev/shm 65 | volumes: 66 | - name: huggingface-cache 67 | persistentVolumeClaim: 68 | claimName: huggingface-cache 69 | - name: shm 70 | emptyDir: 71 | medium: Memory 72 | sizeLimit: 10Gi 73 | tolerations: 74 | - key: nvidia.com/gpu 75 | operator: Exists 76 | effect: NoSchedule 77 | --- 78 | apiVersion: v1 79 | kind: Service 80 | metadata: 81 | name: qwen3-32b-fp8 82 | namespace: sglang 83 | spec: 84 | selector: 85 | app: qwen3-32b-fp8 86 | ports: 87 | - name: http 88 | port: 30000 89 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-magistral-24b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: magistral-24b-fp8 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: magistral-24b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: magistral-24b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: vllm 24 | image: vllm/vllm-openai:v0.10.2 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - RedHatAI/Magistral-Small-2506-FP8 36 | - --served-model-name=magistral-24b-fp8 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | - --max-model-len=32768 # 32K 40 | # Mistral specific 41 | - --enable-auto-tool-choice 42 | - --tool-call-parser=mistral 43 | - --chat-template=examples/tool_chat_template_mistral.jinja 44 | - --tokenizer-mode=mistral 45 | - --load-format=mistral 46 | - --config-format=mistral 47 | env: 48 | - name: HUGGING_FACE_HUB_TOKEN 49 | valueFrom: 50 | secretKeyRef: 51 | name: hf-token 52 | key: token 53 | ports: 54 | - name: http 55 | containerPort: 8000 56 | resources: 57 | requests: 58 | cpu: 3 #75% 59 | memory: 24Gi #75% 60 | nvidia.com/gpu: 1 61 | limits: 62 | nvidia.com/gpu: 1 63 | volumeMounts: 64 | - name: huggingface-cache 65 | mountPath: /root/.cache/huggingface 66 | volumes: 67 | - name: huggingface-cache 68 | persistentVolumeClaim: 69 | claimName: huggingface-cache 70 | tolerations: 71 | - key: nvidia.com/gpu 72 | operator: Exists 73 | effect: NoSchedule 74 | --- 75 | apiVersion: v1 76 | kind: Service 77 | metadata: 78 | name: magistral-24b-fp8 79 | namespace: vllm 80 | spec: 81 | selector: 82 | app: magistral-24b-fp8 83 | ports: 84 | - name: http 85 | port: 8000 86 | -------------------------------------------------------------------------------- /components/llm-model/sglang/model-qwen3-30b-instruct-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-30b-instruct-fp8 5 | namespace: sglang 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-30b-instruct-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-30b-instruct-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: sglang 24 | image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["python3", "-m", "sglang.launch_server"] 34 | args: 35 | - --model-path=Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 36 | - --host=0.0.0.0 37 | - --port=30000 38 | - --trust-remote-code 39 | - --mem-fraction-static=0.90 40 | - --context-length=32768 # 32K 41 | # Qwen3 specific 42 | - --tool-call-parser=qwen25 43 | env: 44 | - name: HF_TOKEN 45 | valueFrom: 46 | secretKeyRef: 47 | name: hf-token 48 | key: token 49 | ports: 50 | - name: http 51 | containerPort: 30000 52 | resources: 53 | requests: 54 | cpu: 3 #75% 55 | memory: 24Gi #75% 56 | nvidia.com/gpu: 1 57 | limits: 58 | nvidia.com/gpu: 1 59 | volumeMounts: 60 | - name: huggingface-cache 61 | mountPath: /root/.cache/huggingface 62 | - name: shm 63 | mountPath: /dev/shm 64 | volumes: 65 | - name: huggingface-cache 66 | persistentVolumeClaim: 67 | claimName: huggingface-cache 68 | - name: shm 69 | emptyDir: 70 | medium: Memory 71 | sizeLimit: 10Gi 72 | tolerations: 73 | - key: nvidia.com/gpu 74 | operator: Exists 75 | effect: NoSchedule 76 | --- 77 | apiVersion: v1 78 | kind: Service 79 | metadata: 80 | name: qwen3-30b-instruct-fp8 81 | namespace: sglang 82 | spec: 83 | selector: 84 | app: qwen3-30b-instruct-fp8 85 | ports: 86 | - name: http 87 | port: 30000 88 | -------------------------------------------------------------------------------- /components/llm-model/sglang/model-qwen3-30b-thinking-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-30b-thinking-fp8 5 | namespace: sglang 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-30b-thinking-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-30b-thinking-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: sglang 24 | image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["python3", "-m", "sglang.launch_server"] 34 | args: 35 | - --model-path=Qwen/Qwen3-30B-A3B-Thinking-2507-FP8 36 | - --host=0.0.0.0 37 | - --port=30000 38 | - --trust-remote-code 39 | - --mem-fraction-static=0.90 40 | - --context-length=32768 # 32K 41 | # Qwen3 specific 42 | - --reasoning-parser=qwen3 43 | env: 44 | - name: HF_TOKEN 45 | valueFrom: 46 | secretKeyRef: 47 | name: hf-token 48 | key: token 49 | ports: 50 | - name: http 51 | containerPort: 30000 52 | resources: 53 | requests: 54 | cpu: 3 #75% 55 | memory: 24Gi #75% 56 | nvidia.com/gpu: 1 57 | limits: 58 | nvidia.com/gpu: 1 59 | volumeMounts: 60 | - name: huggingface-cache 61 | mountPath: /root/.cache/huggingface 62 | - name: shm 63 | mountPath: /dev/shm 64 | volumes: 65 | - name: huggingface-cache 66 | persistentVolumeClaim: 67 | claimName: huggingface-cache 68 | - name: shm 69 | emptyDir: 70 | medium: Memory 71 | sizeLimit: 10Gi 72 | tolerations: 73 | - key: nvidia.com/gpu 74 | operator: Exists 75 | effect: NoSchedule 76 | --- 77 | apiVersion: v1 78 | kind: Service 79 | metadata: 80 | name: qwen3-30b-thinking-fp8 81 | namespace: sglang 82 | spec: 83 | selector: 84 | app: qwen3-30b-thinking-fp8 85 | ports: 86 | - name: http 87 | port: 30000 88 | -------------------------------------------------------------------------------- /components/llm-model/vllm/model-deepseek-r1-qwen3-8b-neuron.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: deepseek-r1-qwen3-8b-neuron 5 | namespace: vllm 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: deepseek-r1-qwen3-8b-neuron 11 | template: 12 | metadata: 13 | labels: 14 | app: deepseek-r1-qwen3-8b-neuron 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | node.kubernetes.io/instance-type: inf2.xlarge 22 | containers: 23 | - name: vllm 24 | image: public.ecr.aws/agentic-ai-platforms-on-k8s/vllm-neuron:deepseek-r1-qwen3-8b-optimum-neuron 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["vllm", "serve"] 34 | args: 35 | - /root/.cache/neuron/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B 36 | - --served-model-name=deepseek-r1-qwen3-8b-neuron 37 | - --trust-remote-code 38 | - --gpu-memory-utilization=0.90 39 | # - --max-model-len=32768 # 32K 40 | # DeepSeek-R1 specific 41 | - --reasoning-parser=deepseek_r1 42 | # Neuron specific 43 | - --tensor-parallel-size=2 44 | # - --gpu-memory-utilization=0.95 45 | - --max-num-seqs=2 46 | - --max-model-len=8192 47 | env: 48 | - name: HF_HOME 49 | value: /root/.cache/huggingface 50 | - name: HF_HUB_CACHE 51 | value: /root/.cache/huggingface/hub 52 | - name: NEURON_RT_NUM_CORES 53 | value: "2" 54 | - name: NEURON_RT_VISIBLE_CORES 55 | value: "0-1" 56 | ports: 57 | - name: http 58 | containerPort: 8000 59 | resources: 60 | requests: 61 | cpu: 3 #75% 62 | memory: 12Gi #75% 63 | aws.amazon.com/neuroncore: 2 64 | limits: 65 | aws.amazon.com/neuroncore: 2 66 | tolerations: 67 | - key: aws.amazon.com/neuron 68 | operator: Exists 69 | effect: NoSchedule 70 | --- 71 | apiVersion: v1 72 | kind: Service 73 | metadata: 74 | name: deepseek-r1-qwen3-8b-neuron 75 | namespace: vllm 76 | spec: 77 | selector: 78 | app: deepseek-r1-qwen3-8b-neuron 79 | ports: 80 | - name: http 81 | port: 8000 82 | -------------------------------------------------------------------------------- /components/llm-model/ollama/index.mjs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zx 2 | 3 | import { fileURLToPath } from "url"; 4 | import path from "path"; 5 | import { $ } from "zx"; 6 | $.verbose = true; 7 | 8 | export const name = "Ollama"; 9 | const __filename = fileURLToPath(import.meta.url); 10 | const DIR = path.dirname(__filename); 11 | let BASE_DIR; 12 | let config; 13 | let utils; 14 | 15 | export async function init(_BASE_DIR, _config, _utils) { 16 | BASE_DIR = _BASE_DIR; 17 | config = _config; 18 | utils = _utils; 19 | } 20 | 21 | export async function install() { 22 | // const requiredEnvVars = []; 23 | // utils.checkRequiredEnvVars(requiredEnvVars); 24 | await $`kubectl apply -f ${path.join(DIR, "namespace.yaml")}`; 25 | await $`kubectl apply -f ${path.join(DIR, "pvc.yaml")}`; 26 | const configmapTemplatePath = path.join(DIR, "configmap.template.yaml"); 27 | const configmapRenderedPath = path.join(DIR, "configmap.rendered.yaml"); 28 | const { models } = config["llm-model"]["ollama"]; 29 | const configmapVars = { 30 | models: models.map((model) => `"${model}"`).join(" "), 31 | }; 32 | utils.renderTemplate(configmapTemplatePath, configmapRenderedPath, configmapVars); 33 | await $`kubectl apply -f ${configmapRenderedPath}`; 34 | const deploymentTemplatePath = path.join(DIR, "deployment.template.yaml"); 35 | const deploymentRenderedPath = path.join(DIR, "deployment.rendered.yaml"); 36 | const { EKS_MODE } = process.env; 37 | const deploymentVars = { 38 | KARPENTER_PREFIX: EKS_MODE === "auto" ? "eks.amazonaws.com" : "karpenter.k8s.aws", 39 | }; 40 | utils.renderTemplate(deploymentTemplatePath, deploymentRenderedPath, deploymentVars); 41 | await $`kubectl apply -f ${deploymentRenderedPath}`; 42 | await $`kubectl apply -f ${path.join(DIR, "service.yaml")}`; 43 | const ingressTemplatePath = path.join(DIR, "ingress.template.yaml"); 44 | const ingressRenderedPath = path.join(DIR, "ingress.rendered.yaml"); 45 | const ingressVars = { 46 | DOMAIN: process.env.DOMAIN, 47 | }; 48 | utils.renderTemplate(ingressTemplatePath, ingressRenderedPath, ingressVars); 49 | await $`kubectl apply -f ${ingressRenderedPath}`; 50 | } 51 | 52 | export async function uninstall() { 53 | await $`kubectl delete -f ${path.join(DIR, "ingress.rendered.yaml")} --ignore-not-found`; 54 | await $`kubectl delete -f ${path.join(DIR, "service.yaml")} --ignore-not-found`; 55 | await $`kubectl delete -f ${path.join(DIR, "deployment.rendered.yaml")} --ignore-not-found`; 56 | await $`kubectl delete -f ${path.join(DIR, "configmap.rendered.yaml")} --ignore-not-found`; 57 | await $`kubectl delete -f ${path.join(DIR, "pvc.yaml")} --ignore-not-found`; 58 | await $`kubectl delete -f ${path.join(DIR, "namespace.yaml")} --ignore-not-found`; 59 | } 60 | -------------------------------------------------------------------------------- /components/ai-gateway/litellm/main.tf: -------------------------------------------------------------------------------- 1 | variable "region" { 2 | type = string 3 | default = "us-west-2" 4 | } 5 | variable "bedrock_region" { 6 | type = string 7 | default = "us-west-2" 8 | } 9 | variable "name" { 10 | type = string 11 | default = "genai-on-eks" 12 | } 13 | variable "enable_bedrock_guardrail" { 14 | type = bool 15 | default = false 16 | } 17 | terraform { 18 | required_providers { 19 | aws = { 20 | source = "hashicorp/aws" 21 | version = "~> 5.96.0" 22 | } 23 | } 24 | } 25 | provider "aws" { 26 | region = var.region 27 | } 28 | provider "aws" { 29 | alias = "bedrock" 30 | region = var.bedrock_region 31 | } 32 | 33 | module "pod_identity" { 34 | source = "terraform-aws-modules/eks-pod-identity/aws" 35 | version = "1.12.0" 36 | 37 | name = "${var.name}-${var.region}-litellm" 38 | use_name_prefix = false 39 | attach_custom_policy = true 40 | policy_statements = [ 41 | { 42 | sid = "Bedrock" 43 | actions = [ 44 | "bedrock:InvokeModel", 45 | "bedrock:InvokeModelWithResponseStream", 46 | "aws-marketplace:Subscribe", 47 | "aws-marketplace:ViewSubscriptions", 48 | "bedrock:ApplyGuardrail" 49 | ] 50 | resources = ["*"] 51 | } 52 | ] 53 | associations = { 54 | litellm = { 55 | service_account = "litellm" 56 | namespace = "litellm" 57 | cluster_name = var.name 58 | } 59 | } 60 | } 61 | 62 | resource "aws_bedrock_guardrail" "this" { 63 | count = var.enable_bedrock_guardrail ? 1 : 0 64 | provider = aws.bedrock 65 | name = var.name 66 | blocked_input_messaging = "Sorry, the model cannot answer this question." 67 | blocked_outputs_messaging = "Sorry, the model cannot answer this question." 68 | description = var.name 69 | contextual_grounding_policy_config { 70 | filters_config { 71 | threshold = 0.7 72 | type = "GROUNDING" 73 | } 74 | filters_config { 75 | threshold = 0.7 76 | type = "RELEVANCE" 77 | } 78 | } 79 | word_policy_config { 80 | managed_word_lists_config { 81 | type = "PROFANITY" 82 | } 83 | } 84 | } 85 | output "bedrock_guardrail_id" { 86 | value = var.enable_bedrock_guardrail ? aws_bedrock_guardrail.this[0].guardrail_id : "" 87 | } 88 | resource "aws_bedrock_guardrail_version" "this" { 89 | count = var.enable_bedrock_guardrail ? 1 : 0 90 | provider = aws.bedrock 91 | description = var.name 92 | guardrail_arn = aws_bedrock_guardrail.this[0].guardrail_arn 93 | } 94 | output "bedrock_guardrail_version" { 95 | value = var.enable_bedrock_guardrail ? aws_bedrock_guardrail_version.this[0].version : "" 96 | } -------------------------------------------------------------------------------- /components/embedding-model/tei/model-qwen3-embedding-06b-bf16-cpu.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-embedding-06b-bf16-cpu 5 | namespace: tei 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-embedding-06b-bf16-cpu 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-embedding-06b-bf16-cpu 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | kubernetes.io/arch: amd64 22 | # karpenter.sh/capacity-type: on-demand 23 | # {{{KARPENTER_PREFIX}}}/instance-category: m 24 | # {{{KARPENTER_PREFIX}}}/instance-generation: "7" 25 | {{{KARPENTER_PREFIX}}}/instance-family: r7i 26 | # node.kubernetes.io/instance-type: r7i.xlarge 27 | containers: 28 | - name: tei 29 | image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.8.2 30 | imagePullPolicy: IfNotPresent 31 | securityContext: 32 | allowPrivilegeEscalation: false 33 | capabilities: 34 | drop: 35 | - NET_RAW 36 | seccompProfile: 37 | type: RuntimeDefault 38 | args: 39 | - --model-id=Qwen/Qwen3-Embedding-0.6B 40 | - --huggingface-hub-cache=/root/.cache/huggingface/hub 41 | - --max-batch-tokens=8192 42 | env: 43 | - name: HF_TOKEN 44 | valueFrom: 45 | secretKeyRef: 46 | name: hf-token 47 | key: token 48 | ports: 49 | - name: http 50 | containerPort: 80 51 | resources: 52 | requests: 53 | cpu: 3.6 #90% 54 | memory: 29Gi #90% 55 | limits: 56 | cpu: 3.6 #90% 57 | memory: 29Gi #90% 58 | volumeMounts: 59 | - name: huggingface-cache 60 | mountPath: /root/.cache/huggingface 61 | - name: shm 62 | mountPath: /dev/shm 63 | volumes: 64 | - name: huggingface-cache 65 | persistentVolumeClaim: 66 | claimName: huggingface-cache 67 | - name: shm 68 | emptyDir: 69 | medium: Memory 70 | sizeLimit: 10Gi 71 | tolerations: 72 | - key: nvidia.com/gpu 73 | operator: Exists 74 | effect: NoSchedule 75 | --- 76 | apiVersion: v1 77 | kind: Service 78 | metadata: 79 | name: qwen3-embedding-06b-bf16-cpu 80 | namespace: tei 81 | spec: 82 | selector: 83 | app: qwen3-embedding-06b-bf16-cpu 84 | ports: 85 | - name: http 86 | port: 80 87 | -------------------------------------------------------------------------------- /components/llm-model/sglang/model-qwen3-coder-30b-fp8.template.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: qwen3-coder-30b-fp8 5 | namespace: sglang 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: qwen3-coder-30b-fp8 11 | template: 12 | metadata: 13 | labels: 14 | app: qwen3-coder-30b-fp8 15 | spec: 16 | securityContext: 17 | seccompProfile: 18 | type: RuntimeDefault 19 | automountServiceAccountToken: false 20 | nodeSelector: 21 | {{{KARPENTER_PREFIX}}}/instance-family: g6e 22 | containers: 23 | - name: sglang 24 | image: docker.io/lmsysorg/sglang:v0.4.10.post2-cu126 25 | imagePullPolicy: IfNotPresent 26 | securityContext: 27 | allowPrivilegeEscalation: false 28 | capabilities: 29 | drop: 30 | - NET_RAW 31 | seccompProfile: 32 | type: RuntimeDefault 33 | command: ["python3", "-m", "sglang.launch_server"] 34 | args: 35 | - --model-path=Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8 36 | - --host=0.0.0.0 37 | - --port=30000 38 | - --trust-remote-code 39 | - --mem-fraction-static=0.90 40 | # - --context-length=32768 # 32K 41 | - --context-length=131072 # 128K 42 | # Qwen3 specific 43 | - --tool-call-parser=qwen25 44 | - --reasoning-parser=qwen3 45 | env: 46 | - name: HF_TOKEN 47 | valueFrom: 48 | secretKeyRef: 49 | name: hf-token 50 | key: token 51 | ports: 52 | - name: http 53 | containerPort: 30000 54 | resources: 55 | requests: 56 | cpu: 3 #75% 57 | memory: 24Gi #75% 58 | nvidia.com/gpu: 1 59 | limits: 60 | nvidia.com/gpu: 1 61 | volumeMounts: 62 | - name: huggingface-cache 63 | mountPath: /root/.cache/huggingface 64 | - name: shm 65 | mountPath: /dev/shm 66 | volumes: 67 | - name: huggingface-cache 68 | persistentVolumeClaim: 69 | claimName: huggingface-cache 70 | - name: shm 71 | emptyDir: 72 | medium: Memory 73 | sizeLimit: 10Gi 74 | tolerations: 75 | - key: nvidia.com/gpu 76 | operator: Exists 77 | effect: NoSchedule 78 | --- 79 | apiVersion: v1 80 | kind: Service 81 | metadata: 82 | name: qwen3-coder-30b-fp8 83 | namespace: sglang 84 | spec: 85 | selector: 86 | app: qwen3-coder-30b-fp8 87 | ports: 88 | - name: http 89 | port: 30000 90 | --------------------------------------------------------------------------------