├── agentic-apps
    ├── strandsdk_agentic_rag_opensearch
    │   ├── src
    │   │   ├── __init__.py
    │   │   ├── tools
    │   │   │   └── __init__.py
    │   │   ├── scripts
    │   │   │   ├── __init__.py
    │   │   │   └── embed_knowledge.py
    │   │   ├── agents
    │   │   │   ├── rag_agent.py
    │   │   │   └── __init__.py
    │   │   ├── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── logging.py
    │   │   │   ├── model_providers.py
    │   │   │   ├── opensearch_client.py
    │   │   │   ├── async_cleanup.py
    │   │   │   └── langfuse_config.py
    │   │   ├── config.py
    │   │   └── test_agents.py
    │   ├── k8s
    │   │   ├── service-account.yaml
    │   │   └── tavily-mcp-deployment.yaml
    │   ├── images
    │   │   └── arch.png
    │   ├── healthcheck-mcp.sh
    │   ├── healthcheck-main.sh
    │   ├── update-k8s-config.sh
    │   ├── requirements.txt
    │   ├── Dockerfile.mcp
    │   ├── startup-mcp.sh
    │   ├── startup-main.sh
    │   ├── pyproject.toml
    │   ├── .env.example
    │   ├── Dockerfile.main
    │   ├── run_main_clean.py
    │   ├── scripts
    │   │   └── start_tavily_server.py
    │   └── run_single_query_clean.py
    ├── agentic_rag_milvus
    │   ├── output
    │   │   └── test.md
    │   ├── images
    │   │   ├── image.png
    │   │   ├── image1.png
    │   │   ├── image2.png
    │   │   ├── image3.png
    │   │   └── image4.png
    │   ├── src
    │   │   ├── VectorStore.ts
    │   │   ├── utils.ts
    │   │   ├── package.json
    │   │   ├── README.md
    │   │   ├── embedKnowledge.ts
    │   │   ├── updateRAG.ts
    │   │   ├── embedCSV.ts
    │   │   ├── rebuildCollection.ts
    │   │   ├── MCPClient.ts
    │   │   └── EmbeddingRetriever.ts
    │   ├── tsconfig.json
    │   ├── knowledge
    │   │   ├── user_9.md
    │   │   ├── user_7.md
    │   │   ├── user_5.md
    │   │   ├── user_1.md
    │   │   ├── user_10.md
    │   │   ├── user_2.md
    │   │   ├── user_3.md
    │   │   ├── user_8.md
    │   │   ├── user_4.md
    │   │   └── user_6.md
    │   ├── test-file-write.js
    │   ├── list_collections.py
    │   ├── package.json
    │   ├── debug-auth.js
    │   ├── debug.js
    │   ├── debug-no-tools.js
    │   ├── test-mcp-server.js
    │   ├── test-endpoint.js
    │   ├── explore_collection.py
    │   ├── debug-tools.js
    │   ├── test-endpoint-with-tools.js
    │   ├── AmazonQ.md
    │   └── README.md
    ├── agentic-idp
    │   ├── requirements.txt
    │   ├── birth_cert.png
    │   ├── doc_reader.py
    │   ├── mcp.py
    │   ├── .env.example
    │   ├── decision.py
    │   └── storage.py
    └── agentic_rag_opensearch
    │   ├── src
    │       ├── scripts
    │       │   ├── index.ts
    │       │   └── embedKnowledge.ts
    │       ├── agents
    │       │   └── index.ts
    │       ├── VectorStore.ts
    │       ├── utils.ts
    │       ├── MCPClient.ts
    │       ├── embedKnowledge.ts
    │       ├── LangfuseConfig.ts
    │       ├── test-agents.ts
    │       ├── embedCSV.ts
    │       ├── test-langfuse.ts
    │       ├── index.ts
    │       └── Agent.ts
    │   ├── images
    │       ├── arch.png
    │       ├── image.png
    │       ├── image1.png
    │       ├── image2.png
    │       ├── image3.png
    │       └── image4.png
    │   ├── tsconfig.json
    │   ├── update-policy.json
    │   ├── package.json
    │   ├── .env.example
    │   ├── output
    │       └── antonette.md
    │   ├── MULTI_AGENT_GUIDE.md
    │   └── AmazonQ.md
├── model-hosting
    ├── ray-server
    │   ├── local-requirements.txt
    │   └── llamacpp.py
    ├── ray-services
    │   └── ingress
    │   │   ├── add-sg-lb-eks.sh
    │   │   ├── ingress-embedding.yaml
    │   │   └── ingress-cpu.yaml
    ├── standalone-llamacpp-embedding.yaml
    ├── standalone-vllm-reasoning.yaml
    └── standalone-vllm-vision.yaml
├── milvus
    ├── milvus-update.yaml
    ├── ebs-storage-class.yaml
    ├── milvus-nlb-service.yaml
    ├── milvus-standalone.yaml
    └── README.md
├── image
    ├── Inference_GenAI_architecture_EKS.jpg
    └── Inference_GenAI_app_architecture_EKS.jpg
├── CODE_OF_CONDUCT.md
├── base_eks_setup
    ├── gp3.yaml
    ├── tracking_stack.yaml
    ├── prometheus-monitoring.yaml
    └── karpenter_nodepool
    │   ├── graviton-nodepool.yaml
    │   ├── x86-nodepool.yaml
    │   ├── inf2-nodepool.yaml
    │   └── gpu-nodepool.yaml
├── model-observability
    ├── langfuse-secret.yaml
    ├── langfuse-redis-port-patch.yaml
    ├── langfuse-web-ingress.yaml
    ├── langfuse-value.yaml
    └── values.yaml.DEPRECATED
├── benchmark
    ├── Dockerfile
    └── prompts.txt
├── model-gateway
    └── litellm-ingress.yaml
├── LICENSE
├── .gitignore
└── CONTRIBUTING.md


/agentic-apps/strandsdk_agentic_rag_opensearch/src/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/output/test.md:
--------------------------------------------------------------------------------
1 | # Test File
2 | 
3 | This is a test.


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/requirements.txt:
--------------------------------------------------------------------------------
1 | langfuse
2 | openai
3 | langgraph
4 | 
5 | 


--------------------------------------------------------------------------------
/model-hosting/ray-server/local-requirements.txt:
--------------------------------------------------------------------------------
1 | llama_cpp_python==0.3.2
2 | transformers==4.46.0
3 | fastapi[all]
4 | ray==2.39.0
5 | starlette==0.41.3


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/scripts/index.ts:
--------------------------------------------------------------------------------
1 | // Export scripts for programmatic usage if needed
2 | export * from './embedKnowledge';
3 | 
4 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/k8s/service-account.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: strandsdk-rag-service-account
5 |   namespace: default
6 | 
7 | 


--------------------------------------------------------------------------------
/milvus/milvus-update.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: milvus.io/v1beta1
2 | kind: Milvus
3 | metadata:
4 |   name: my-release
5 |   namespace: default
6 | spec:
7 |   components:
8 |     image: milvusdb/milvus:v2.5.12
9 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/birth_cert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic-idp/birth_cert.png


--------------------------------------------------------------------------------
/image/Inference_GenAI_architecture_EKS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/image/Inference_GenAI_architecture_EKS.jpg


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/images/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image.png


--------------------------------------------------------------------------------
/image/Inference_GenAI_app_architecture_EKS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/image/Inference_GenAI_app_architecture_EKS.jpg


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/images/image1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image1.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/images/image2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image2.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/images/image3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image3.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/images/image4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image4.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/arch.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/image1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image1.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/image2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image2.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/image3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image3.png


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/images/image4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image4.png


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/images/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/strandsdk_agentic_rag_opensearch/images/arch.png


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/agents/rag_agent.py:
--------------------------------------------------------------------------------
1 | # This file has been removed - RAG functionality is now integrated into the supervisor_agent.py
2 | # The supervisor agent now directly uses the search_knowledge_base tool for RAG operations
3 | 


--------------------------------------------------------------------------------
/milvus/ebs-storage-class.yaml:
--------------------------------------------------------------------------------
 1 | # ebs-storage-class.yaml
 2 | apiVersion: storage.k8s.io/v1
 3 | kind: StorageClass
 4 | metadata:
 5 |   name: ebs-sc
 6 | provisioner: ebs.csi.aws.com
 7 | volumeBindingMode: WaitForFirstConsumer
 8 | parameters:
 9 |   type: gp3
10 |   encrypted: "true"
11 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility functions and helpers."""
2 | 
3 | from .logging import log_title, setup_logging
4 | from .langfuse_config import LangfuseConfig, langfuse_config
5 | 
6 | __all__ = ["log_title", "setup_logging", "LangfuseConfig", "langfuse_config"]
7 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/base_eks_setup/gp3.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: storage.k8s.io/v1
 2 | kind: StorageClass
 3 | metadata:
 4 |   name: gp3
 5 |   annotations:
 6 |     storageclass.kubernetes.io/is-default-class: "true"
 7 | provisioner: ebs.csi.aws.com
 8 | parameters:
 9 |   type: gp3
10 |   fsType: ext4
11 | reclaimPolicy: Delete
12 | volumeBindingMode: Immediate


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/agents/index.ts:
--------------------------------------------------------------------------------
1 | export { default as SupervisorAgent } from './SupervisorAgent';
2 | export { default as KnowledgeAgent } from './KnowledgeAgent';
3 | export { default as RAGAgent } from './RAGAgent';
4 | export { default as MCPAgent } from './MCPAgent';
5 | 
6 | export type { AgentTask, AgentResult } from './SupervisorAgent';
7 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/VectorStore.ts:
--------------------------------------------------------------------------------
 1 | export interface VectorStoreItem {
 2 |     embedding: number[];
 3 |     document: string;
 4 | }
 5 | 
 6 | export interface VectorStore {
 7 |     addEmbedding(embedding: number[], document: string): Promise<void>;
 8 |     search(queryEmbedding: number[], topK: number): Promise<string[]>;
 9 |     close?(): Promise<void>;
10 | }
11 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/VectorStore.ts:
--------------------------------------------------------------------------------
 1 | export interface VectorStoreItem {
 2 |     embedding: number[];
 3 |     document: string;
 4 | }
 5 | 
 6 | export interface VectorStore {
 7 |     addEmbedding(embedding: number[], document: string): Promise<void>;
 8 |     search(queryEmbedding: number[], topK: number): Promise<string[]>;
 9 |     close?(): Promise<void>;
10 | }
11 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | """Multi-agent system using Strands SDK with built-in tracing."""
 2 | 
 3 | from .supervisor_agent import supervisor_agent
 4 | from .knowledge_agent import knowledge_agent
 5 | from .mcp_agent import mcp_agent
 6 | 
 7 | __all__ = [
 8 |     "supervisor_agent",
 9 |     "knowledge_agent", 
10 |     "mcp_agent"
11 | ]
12 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "Bundler",
 6 |     "esModuleInterop": true,
 7 |     "outDir": "./dist",
 8 |     "rootDir": "./src",
 9 |     "strict": true
10 |   },
11 |   "include": [
12 |     "src/**/*"
13 |   ],
14 |   "exclude": [
15 |     "node_modules"
16 |   ]
17 | }


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ESNext",
 5 |     "moduleResolution": "Bundler",
 6 |     "esModuleInterop": true,
 7 |     "outDir": "./dist",
 8 |     "rootDir": "./src",
 9 |     "strict": true
10 |   },
11 |   "include": [
12 |     "src/**/*"
13 |   ],
14 |   "exclude": [
15 |     "node_modules"
16 |   ]
17 | }


--------------------------------------------------------------------------------
/base_eks_setup/tracking_stack.yaml:
--------------------------------------------------------------------------------
 1 | AWSTemplateFormatVersion: '2010-09-09'
 2 | Description: '(SO9150) - Guidance for Scalable Model Inference and Agentic AI on Amazon EKS'
 3 | Resources: 
 4 |   EmptyResource: 
 5 |     Type: 'AWS::CloudFormation::WaitConditionHandle'
 6 |     
 7 | Outputs:
 8 |   ProjectStatus:
 9 |     Description: 'Project initialization status'
10 |     Value: 'Starting the Agentic AI project'
11 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/utils.ts:
--------------------------------------------------------------------------------
1 | import chalk from "chalk";
2 | 
3 | export function logTitle(message: string) {
4 |     const totalLength = 80;
5 |     const messageLength = message.length;
6 |     const padding = Math.max(0, totalLength - messageLength - 4); // 4 for the "=="
7 |     const paddedMessage = `${'='.repeat(Math.floor(padding / 2))} ${message} ${'='.repeat(Math.ceil(padding / 2))}`;
8 |     console.log(chalk.bold.cyanBright(paddedMessage));
9 | }


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/utils.ts:
--------------------------------------------------------------------------------
1 | import chalk from "chalk";
2 | 
3 | export function logTitle(message: string) {
4 |     const totalLength = 80;
5 |     const messageLength = message.length;
6 |     const padding = Math.max(0, totalLength - messageLength - 4); // 4 for the "=="
7 |     const paddedMessage = `${'='.repeat(Math.floor(padding / 2))} ${message} ${'='.repeat(Math.ceil(padding / 2))}`;
8 |     console.log(chalk.bold.cyanBright(paddedMessage));
9 | }


--------------------------------------------------------------------------------
/model-observability/langfuse-secret.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | type: Opaque
 4 | metadata:
 5 |   name: langfuse
 6 | stringData:
 7 |   salt: changeme
 8 |   encryption-key: 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef # Generate with `openssl rand -hex 32`
 9 |   nextauth-secret: changeme
10 |   postgresql-password: postgres
11 |   clickhouse-password: changeme
12 |   redis-password: changeme
13 |   s3-user: minio
14 |   s3-password: miniosecret


--------------------------------------------------------------------------------
/benchmark/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Build stage
 2 | FROM golang:1.21-alpine AS builder
 3 | 
 4 | WORKDIR /app
 5 | COPY . .
 6 | RUN CGO_ENABLED=0 GOOS=linux go build -o benchmark perf_benchmark.go
 7 | 
 8 | # Run stage
 9 | FROM alpine:latest
10 | 
11 | WORKDIR /app
12 | COPY --from=builder /app/benchmark .
13 | COPY prompts.txt /app
14 | 
15 | ENV URL="http://localhost:8000/v1/chat/completions"
16 | ENV REQUESTS_PER_PROMPT=10
17 | ENV NUM_WARMUP_REQUESTS=3
18 | 
19 | CMD ["./benchmark"]
20 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_9.md:
--------------------------------------------------------------------------------
 1 | # Glenna Reichert
 2 | 
 3 | - **Username**: Delphine
 4 | - **Email**: Chaim_McDermott@dana.io
 5 | - **Address**: 
 6 |   - Street: Dayna Park
 7 |   - Suite: Suite 449
 8 |   - City: Bartholomebury
 9 |   - Zipcode: 76495-3109
10 |   - Geo: 
11 |     - Latitude: 24.6463
12 |     - Longitude: -168.8889
13 | - **Phone**: (775)976-6794 x41206
14 | - **Website**: conrad.com
15 | - **Company**: 
16 |   - Name: Yost and Sons
17 |   - Catchphrase: Switchable contextual benchmark


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_7.md:
--------------------------------------------------------------------------------
 1 | # Kurtis Weissnat
 2 | 
 3 | - **Username**: Elwyn.Skiles
 4 | - **Email**: Telly.Hoeger@billy.biz
 5 | - **Address**: 
 6 |   - Street: Rex Trail
 7 |   - Suite: Suite 280
 8 |   - City: Howemouth
 9 |   - Zipcode: 58804-1099
10 |   - Geo: 
11 |     - Latitude: 24.8918
12 |     - Longitude: 21.8984
13 | - **Phone**: 210.067.6132
14 | - **Website**: elvis.io
15 | - **Company**: 
16 |   - Name: Johns Group
17 |   - Catchphrase: Configurable multimedia task-force
18 |   - BS: generate enterprise e-tailers


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_5.md:
--------------------------------------------------------------------------------
 1 | # Chelsey Dietrich
 2 | 
 3 | - **Username**: Kamren
 4 | - **Email**: Lucio_Hettinger@annie.ca
 5 | - **Address**: 
 6 |   - Street: Skiles Walks
 7 |   - Suite: Suite 351
 8 |   - City: Roscoeview
 9 |   - Zipcode: 33263
10 |   - Geo: 
11 |     - Latitude: -31.8129
12 |     - Longitude: 62.5342
13 | - **Phone**: (254)954-1289
14 | - **Website**: demarco.info
15 | - **Company**: 
16 |   - Name: Keebler LLC
17 |   - Catchphrase: User-centric fault-tolerant solution
18 |   - BS: revolutionize end-to-end systems


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_1.md:
--------------------------------------------------------------------------------
 1 | # Leanne Graham
 2 | 
 3 | - **Username**: Bret
 4 | - **Email**: Sincere@april.biz
 5 | - **Address**: 
 6 |   - Street: Kulas Light
 7 |   - Suite: Apt. 556
 8 |   - City: Gwenborough
 9 |   - Zipcode: 92998-3874
10 |   - Geo: 
11 |     - Latitude: -37.3159
12 |     - Longitude: 81.1496
13 | - **Phone**: 1-770-736-8031 x56442
14 | - **Website**: hildegard.org
15 | - **Company**: 
16 |   - Name: Romaguera-Crona
17 |   - Catchphrase: Multi-layered client-server neural-net
18 |   - BS: harness real-time e-markets


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_10.md:
--------------------------------------------------------------------------------
 1 | # Clementina DuBuque
 2 | 
 3 | - **Username**: Moriah.Stanton
 4 | - **Email**: Rey.Padberg@karina.biz
 5 | - **Address**: 
 6 |   - Street: Kattie Turnpike
 7 |   - Suite: Suite 198
 8 |   - City: Lebsackbury
 9 |   - Zipcode: 31428-2261
10 |   - Geo: 
11 |     - Latitude: -38.2386
12 |     - Longitude: 57.2232
13 | - **Phone**: 024-648-3804
14 | - **Website**: ambrose.net
15 | - **Company**: 
16 |   - Name: Hoeger LLC
17 |   - Catchphrase: Centralized empowering task-force
18 |   - BS: target end-to-end models


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_2.md:
--------------------------------------------------------------------------------
 1 | # Ervin Howell
 2 | 
 3 | - **Username**: Antonette
 4 | - **Email**: Shanna@melissa.tv
 5 | - **Address**: 
 6 |   - Street: Victor Plains
 7 |   - Suite: Suite 879
 8 |   - City: Wisokyburgh
 9 |   - Zipcode: 90566-7771
10 |   - Geo: 
11 |     - Latitude: -43.9509
12 |     - Longitude: -34.4618
13 | - **Phone**: 010-692-6593 x09125
14 | - **Website**: anastasia.net
15 | - **Company**: 
16 |   - Name: Deckow-Crist
17 |   - Catchphrase: Proactive didactic contingency
18 |   - BS: synergize scalable supply-chains


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_3.md:
--------------------------------------------------------------------------------
 1 | # Clementine Bauch
 2 | 
 3 | - **Username**: Samantha
 4 | - **Email**: Nathan@yesenia.net
 5 | - **Address**: 
 6 |   - Street: Douglas Extension
 7 |   - Suite: Suite 847
 8 |   - City: McKenziehaven
 9 |   - Zipcode: 59590-4157
10 |   - Geo: 
11 |     - Latitude: -68.6102
12 |     - Longitude: -47.0653
13 | - **Phone**: 1-463-123-4447
14 | - **Website**: ramiro.info
15 | - **Company**: 
16 |   - Name: Romaguera-Jacobson
17 |   - Catchphrase: Face to face bifurcated interface
18 |   - BS: e-enable strategic applications


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_8.md:
--------------------------------------------------------------------------------
 1 | # Nicholas Runolfsdottir V
 2 | 
 3 | - **Username**: Maxime_Nienow
 4 | - **Email**: Sherwood@rosamond.me
 5 | - **Address**: 
 6 |   - Street: Ellsworth Summit
 7 |   - Suite: Suite 729
 8 |   - City: Aliyaview
 9 |   - Zipcode: 45169
10 |   - Geo: 
11 |     - Latitude: -14.3990
12 |     - Longitude: -120.7677
13 | - **Phone**: 586.493.6943 x140
14 | - **Website**: jacynthe.com
15 | - **Company**: 
16 |   - Name: Abernathy Group
17 |   - Catchphrase: Implemented secondary concept
18 |   - BS: e-enable extensible e-tailers


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_4.md:
--------------------------------------------------------------------------------
 1 | # Patricia Lebsack
 2 | 
 3 | - **Username**: Karianne
 4 | - **Email**: Julianne.OConner@kory.org
 5 | - **Address**: 
 6 |   - Street: Hoeger Mall
 7 |   - Suite: Apt. 692
 8 |   - City: South Elvis
 9 |   - Zipcode: 53919-4257
10 |   - Geo: 
11 |     - Latitude: 29.4572
12 |     - Longitude: -164.2990
13 | - **Phone**: 493-170-9623 x156
14 | - **Website**: kale.biz
15 | - **Company**: 
16 |   - Name: Robel-Corkery
17 |   - Catchphrase: Multi-tiered zero tolerance productivity
18 |   - BS: transition cutting-edge web services


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/knowledge/user_6.md:
--------------------------------------------------------------------------------
 1 | # Mrs. Dennis Schulist
 2 | 
 3 | - **Username**: Leopoldo_Corkery
 4 | - **Email**: Karley_Dach@jasper.info
 5 | - **Address**: 
 6 |   - Street: Norberto Crossing
 7 |   - Suite: Apt. 950
 8 |   - City: South Christy
 9 |   - Zipcode: 23505-1337
10 |   - Geo: 
11 |     - Latitude: -71.4197
12 |     - Longitude: 71.7478
13 | - **Phone**: 1-477-935-8478 x6430
14 | - **Website**: ola.org
15 | - **Company**: 
16 |   - Name: Considine-Lockman
17 |   - Catchphrase: Synchronised bottom-line interface
18 |   - BS: e-enable innovative applications


--------------------------------------------------------------------------------
/model-observability/langfuse-redis-port-patch.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: langfuse-web
 5 |   namespace: default
 6 | spec:
 7 |   template:
 8 |     spec:
 9 |       containers:
10 |       - name: langfuse-web
11 |         env:
12 |         - name: REDIS_PORT
13 |           value: "6379"
14 | ---
15 | apiVersion: apps/v1
16 | kind: Deployment
17 | metadata:
18 |   name: langfuse-worker
19 |   namespace: default
20 | spec:
21 |   template:
22 |     spec:
23 |       containers:
24 |       - name: langfuse-worker
25 |         env:
26 |         - name: REDIS_PORT
27 |           value: "6379"
28 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/test-file-write.js:
--------------------------------------------------------------------------------
 1 | // Simple test script to verify file writing works
 2 | import fs from 'fs';
 3 | import path from 'path';
 4 | 
 5 | const outPath = path.resolve(process.cwd(), 'output');
 6 | const filePath = path.join(outPath, 'test.md');
 7 | 
 8 | console.log(`Attempting to write to: ${filePath}`);
 9 | console.log(`Output directory exists: ${fs.existsSync(outPath)}`);
10 | 
11 | try {
12 |   fs.writeFileSync(filePath, '# Test File\n\nThis is a test.');
13 |   console.log(`Successfully wrote file to ${filePath}`);
14 | } catch (error) {
15 |   console.error(`Failed to write file: ${error}`);
16 | }
17 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/healthcheck-mcp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Health check script for MCP Server
 4 | 
 5 | # Load environment variables from ConfigMap or local file
 6 | if [ -f "/app/config/.env" ]; then
 7 |     export $(grep -v '^#' /app/config/.env | xargs) 2>/dev/null || true
 8 | elif [ -f "/app/.env" ]; then
 9 |     export $(grep -v '^#' /app/.env | xargs) 2>/dev/null || true
10 | fi
11 | 
12 | # Check if the MCP server is responding
13 | if curl -f -s http://localhost:8001/mcp/ > /dev/null 2>&1; then
14 |     echo "MCP Server: Running"
15 |     exit 0
16 | else
17 |     echo "MCP Server: Not responding"
18 |     exit 1
19 | fi
20 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/healthcheck-main.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Health check script for Main Application (FastAPI Server)
 4 | 
 5 | # Load environment variables from ConfigMap or local file
 6 | if [ -f "/app/config/.env" ]; then
 7 |     export $(grep -v '^#' /app/config/.env | xargs) 2>/dev/null || true
 8 | elif [ -f "/app/.env" ]; then
 9 |     export $(grep -v '^#' /app/.env | xargs) 2>/dev/null || true
10 | fi
11 | 
12 | # Check if the FastAPI server is responding
13 | if curl -f -s http://localhost:8000/health > /dev/null 2>&1; then
14 |     echo "FastAPI Server: Running"
15 |     exit 0
16 | else
17 |     echo "FastAPI Server: Not responding"
18 |     exit 1
19 | fi
20 | 


--------------------------------------------------------------------------------
/milvus/milvus-nlb-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: milvus-nlb
 5 |   namespace: default
 6 |   annotations:
 7 |     service.beta.kubernetes.io/aws-load-balancer-type: nlb
 8 |     service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
 9 | spec:
10 |   type: LoadBalancer
11 |   ports:
12 |   - name: milvus
13 |     port: 19530
14 |     targetPort: 19530
15 |     protocol: TCP
16 |   - name: metrics
17 |     port: 9091
18 |     targetPort: 9091
19 |     protocol: TCP
20 |   selector:
21 |     app.kubernetes.io/instance: my-release
22 |     app.kubernetes.io/managed-by: milvus-operator
23 |     app.kubernetes.io/name: milvus
24 |     "milvus.io/service": "true"
25 | 


--------------------------------------------------------------------------------
/model-hosting/ray-services/ingress/add-sg-lb-eks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | REGION=ap-southeast-2
 5 | CLUSTER_NAME=llm-eks-cluster
 6 | ALB_NAME=llama-cpp-cpu-lb
 7 | export AWS_DEFAULT_REGION=$REGION
 8 | 
 9 | CLUSTER_SG=$(aws eks describe-cluster \
10 |     --name $CLUSTER_NAME \
11 |     --query 'cluster.resourcesVpcConfig.clusterSecurityGroupId' \
12 |     --output text)
13 | 
14 | ALB_SG=$(aws elbv2 describe-load-balancers \
15 |     --names $ALB_NAME \
16 |     --query 'LoadBalancers[0].SecurityGroups[0]' \
17 |     --output text)    
18 | 
19 | aws ec2 authorize-security-group-ingress \
20 |     --group-id $CLUSTER_SG \
21 |     --source-group $ALB_SG \
22 |     --protocol tcp \
23 |     --port-range 0-65535


--------------------------------------------------------------------------------
/model-gateway/litellm-ingress.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: litellm-ingress-alb
 5 |   annotations:
 6 |     kubernetes.io/ingress.class: alb
 7 |     alb.ingress.kubernetes.io/scheme: internet-facing
 8 |     alb.ingress.kubernetes.io/target-type: ip
 9 |     alb.ingress.kubernetes.io/healthcheck-path: /
10 |     alb.ingress.kubernetes.io/healthcheck-port: "4000"
11 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]'
12 | spec:
13 |   rules:
14 |     - http:
15 |         paths:
16 |           - path: /
17 |             pathType: Prefix
18 |             backend:
19 |               service:
20 |                 name: litellm
21 |                 port:
22 |                   number: 4000
23 | 


--------------------------------------------------------------------------------
/model-observability/langfuse-web-ingress.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: langfuse-web-ingress-alb
 5 |   annotations:
 6 |     kubernetes.io/ingress.class: alb
 7 |     alb.ingress.kubernetes.io/scheme: internet-facing
 8 |     alb.ingress.kubernetes.io/target-type: ip
 9 |     alb.ingress.kubernetes.io/healthcheck-path: /
10 |     alb.ingress.kubernetes.io/healthcheck-port: '3000'
11 |     alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]'
12 | spec:
13 |   rules:
14 |     - http:
15 |         paths:
16 |           - path: /
17 |             pathType: Prefix
18 |             backend:
19 |               service:
20 |                 name: langfuse-web
21 |                 port:
22 |                   number: 3000
23 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/doc_reader.py:
--------------------------------------------------------------------------------
 1 | from PyPDF2 import PdfReader
 2 | 
 3 | from pathlib import Path
 4 | import logging, base64
 5 | 
 6 | 
 7 | def encode_image(image_path):
 8 |     """Encode image to base64 string"""
 9 |     with open(image_path, "rb") as image_file:
10 |         return base64.b64encode(image_file.read()).decode("utf-8")
11 |     
12 | # Add this function to handle PDF processing
13 | def process_pdf(pdf_path: str) -> str:
14 |     """Process PDF and return its content"""
15 |     try:
16 |         reader = PdfReader(pdf_path)
17 |         text = ""
18 |         for page in reader.pages:
19 |             text += page.extract_text()
20 |         return text
21 |     except Exception as e:
22 |         logging.error(f"PDF processing error: {str(e)}")
23 |         return ""


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/logging.py:
--------------------------------------------------------------------------------
 1 | """Logging utilities for the application."""
 2 | 
 3 | import logging
 4 | import sys
 5 | from typing import Optional
 6 | 
 7 | def setup_logging(level: str = "INFO") -> None:
 8 |     """Setup logging configuration."""
 9 |     logging.basicConfig(
10 |         level=getattr(logging, level.upper()),
11 |         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
12 |         handlers=[
13 |             logging.StreamHandler(sys.stdout),
14 |         ]
15 |     )
16 | 
17 | def log_title(title: str, width: int = 60) -> None:
18 |     """Print a formatted title for logging."""
19 |     border = "=" * width
20 |     padding = (width - len(title) - 2) // 2
21 |     formatted_title = f"{border}\n{' ' * padding} {title} {' ' * padding}\n{border}"
22 |     print(formatted_title)
23 | 


--------------------------------------------------------------------------------
/model-hosting/ray-services/ingress/ingress-embedding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: embedding-ray-service-ingress
 5 |   annotations:
 6 |     alb.ingress.kubernetes.io/scheme: internet-facing
 7 |     alb.ingress.kubernetes.io/target-type: ip
 8 |     alb.ingress.kubernetes.io/load-balancer-name: "embedding-ray-lb"
 9 |     alb.ingress.kubernetes.io/healthcheck-protocol: HTTP
10 |     alb.ingress.kubernetes.io/healthcheck-port: "8265"
11 |     alb.ingress.kubernetes.io/healthcheck-path: "/"
12 |     alb.ingress.kubernetes.io/success-codes: "200"
13 | spec:
14 |   ingressClassName: alb
15 |   rules:
16 |   - http:
17 |       paths:
18 |       - path: /
19 |         pathType: Prefix
20 |         backend:
21 |           service:
22 |             name: ray-service-llamacpp-serve-svc
23 |             port:
24 |               number: 8000
25 | 


--------------------------------------------------------------------------------
/model-hosting/ray-services/ingress/ingress-cpu.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: networking.k8s.io/v1
 2 | kind: Ingress
 3 | metadata:
 4 |   name: llama-3-8b-cpu-llama-serve-ingress
 5 |   annotations:
 6 |     alb.ingress.kubernetes.io/scheme: internet-facing
 7 |     alb.ingress.kubernetes.io/target-type: ip
 8 |     alb.ingress.kubernetes.io/load-balancer-name: "llama-cpp-cpu-lb"
 9 |     alb.ingress.kubernetes.io/healthcheck-protocol: HTTP
10 |     alb.ingress.kubernetes.io/healthcheck-port: "8265"
11 |     alb.ingress.kubernetes.io/healthcheck-path: "/"
12 |     alb.ingress.kubernetes.io/success-codes: "200"
13 | spec:
14 |   ingressClassName: alb
15 |   rules:
16 |   - http:
17 |       paths:
18 |       - path: /
19 |         pathType: Prefix
20 |         backend:
21 |           service:
22 |             name: ray-service-llamacpp-serve-svc
23 |             port:
24 |               number: 8000
25 | 
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/mcp.py:
--------------------------------------------------------------------------------
 1 | # # https://modelcontextprotocol.io/quickstart/server
 2 | # from typing import Any
 3 | # import httpx
 4 | # from mcp.server.fastmcp import FastMCP
 5 | 
 6 | # # Initialize FastMCP server
 7 | # mcp = FastMCP("weather")
 8 | 
 9 | 
10 | # @mcp.tool()
11 | # async def get_personal_info(state: str) -> str:
12 | #     """Get weather alerts for a US state.
13 | 
14 | #     Args:
15 | #         state: Two-letter US state code (e.g. CA, NY)
16 | #     """
17 | #     url = f"{NWS_API_BASE}/alerts/active/area/{state}"
18 | #     data = await make_nws_request(url)
19 | 
20 | #     if not data or "features" not in data:
21 | #         return "Unable to fetch alerts or no alerts found."
22 | 
23 | #     if not data["features"]:
24 | #         return "No active alerts for this state."
25 | 
26 | #     alerts = [format_alert(feature) for feature in data["features"]]
27 | #     return "\n---\n".join(alerts)


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/.env.example:
--------------------------------------------------------------------------------
 1 | # Model Configuration
 2 | # API key for accessing the LLM vision model through the gateway
 3 | LLAMA_VISION_MODEL_KEY=your-api-key-here
 4 | 
 5 | # URL of the LiteLLM API gateway deployed in your EKS cluster
 6 | # This should be the load balancer URL from your model-gateway deployment
 7 | API_GATEWAY_URL=http://your-litellm-gateway-url
 8 | 
 9 | # Langfuse Configuration for LLM Observability
10 | # URL of the Langfuse service deployed in your EKS cluster
11 | # This should be the load balancer URL from your model-observability deployment
12 | LANGFUSE_HOST=http://your-langfuse-host-url
13 | 
14 | # Langfuse public key for authentication
15 | # Get this from your Langfuse dashboard after creating a project
16 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key
17 | 
18 | # Langfuse secret key for authentication
19 | # Get this from your Langfuse dashboard after creating a project
20 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key
21 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/list_collections.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv
 3 | from pymilvus import connections, utility
 4 | 
 5 | # Load environment variables
 6 | load_dotenv()
 7 | 
 8 | # Get Milvus address from environment
 9 | milvus_address = os.getenv("MILVUS_ADDRESS", "localhost:19530")
10 | milvus_host, milvus_port = milvus_address.split(":")
11 | 
12 | # Connect to Milvus
13 | print(f"Connecting to Milvus at {milvus_host}:{milvus_port}...")
14 | try:
15 |     connections.connect(
16 |         alias="default", 
17 |         host=milvus_host,
18 |         port=milvus_port
19 |     )
20 |     
21 |     # List all collections
22 |     collections = utility.list_collections()
23 |     print("Available collections:")
24 |     for collection in collections:
25 |         print(f"- {collection}")
26 |         
27 |     # Close connection
28 |     connections.disconnect("default")
29 |     
30 | except Exception as e:
31 |     print(f"Error connecting to Milvus: {e}")
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17 | 
18 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/update-policy.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "Rules": [
 4 |       {
 5 |         "Resource": [
 6 |           "collection/vectordb"
 7 |         ],
 8 |         "Permission": [
 9 |           "aoss:CreateCollectionItems",
10 |           "aoss:DeleteCollectionItems",
11 |           "aoss:UpdateCollectionItems",
12 |           "aoss:DescribeCollectionItems"
13 |         ],
14 |         "ResourceType": "collection"
15 |       },
16 |       {
17 |         "Resource": [
18 |           "index/vectordb/*"
19 |         ],
20 |         "Permission": [
21 |           "aoss:CreateIndex",
22 |           "aoss:DeleteIndex",
23 |           "aoss:UpdateIndex",
24 |           "aoss:DescribeIndex",
25 |           "aoss:ReadDocument",
26 |           "aoss:WriteDocument"
27 |         ],
28 |         "ResourceType": "index"
29 |       }
30 |     ],
31 |     "Principal": [
32 |       "arn:aws:iam::412381761882:user/admin",
33 |       "arn:aws:iam::412381761882:root"
34 |     ],
35 |     "Description": "Rule 1"
36 |   }
37 | ]
38 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/update-k8s-config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Interactive Kubernetes ConfigMap Update Script
 4 | # Updates k8s/configmap.yaml with your actual service endpoints and API keys
 5 | 
 6 | echo "🚀 Starting Kubernetes ConfigMap Update Tool..."
 7 | echo ""
 8 | 
 9 | # Check if Python 3 is available
10 | if ! command -v python3 &> /dev/null; then
11 |     echo "❌ Python 3 is required but not found"
12 |     echo "Please install Python 3 and try again"
13 |     exit 1
14 | fi
15 | 
16 | # Check if k8s directory exists
17 | if [ ! -d "k8s" ]; then
18 |     echo "❌ k8s directory not found"
19 |     echo "Please run this script from the project root directory"
20 |     exit 1
21 | fi
22 | 
23 | # Check if configmap.yaml exists
24 | if [ ! -f "k8s/configmap.yaml" ]; then
25 |     echo "❌ k8s/configmap.yaml not found"
26 |     echo "Please ensure the ConfigMap file exists"
27 |     exit 1
28 | fi
29 | 
30 | # Run the Python script
31 | python3 ./update_k8s_config.py
32 | 
33 | echo ""
34 | echo "✅ ConfigMap update completed!"
35 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "agentic-rag-opensearch",
 3 |   "version": "1.0.0",
 4 |   "description": "Agentic RAG system with MCP and custom embedding",
 5 |   "main": "index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "dev": "tsx index.ts",
 9 |     "embed-knowledge": "tsx embedKnowledge.ts",
10 |     "embed-csv": "tsx embedCSV.ts",
11 |     "update-rag": "tsx updateRAG.ts",
12 |     "test": "echo \"Error: no test specified\" && exit 1"
13 |   },
14 |   "keywords": [
15 |     "rag",
16 |     "mcp",
17 |     "llm",
18 |     "agent"
19 |   ],
20 |   "author": "",
21 |   "license": "ISC",
22 |   "dependencies": {
23 |     "@aws-sdk/client-bedrock-runtime": "^3.525.0",
24 |     "@modelcontextprotocol/sdk": "^1.10.1",
25 |     "@modelcontextprotocol/server-filesystem": "^2025.3.28",
26 |     "@zilliz/milvus2-sdk-node": "^2.5.8",
27 |     "chalk": "^5.4.1",
28 |     "csv-parse": "^5.5.5",
29 |     "dotenv": "^16.4.5",
30 |     "node-fetch": "^3.3.2",
31 |     "openai": "^4.28.4",
32 |     "tsx": "^4.7.1",
33 |     "typescript": "^5.3.3"
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "agentic-rag-opensearch",
 3 |   "version": "1.0.0",
 4 |   "description": "Multi-Agent RAG system with MCP and OpenSearch",
 5 |   "main": "index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "dev": "tsx src/index.ts",
 9 |     "embed-knowledge": "tsx src/scripts/embedKnowledge.ts",
10 |     "test-agents": "tsx src/test-agents.ts",
11 |     "test": "echo \"Error: no test specified\" && exit 1"
12 |   },
13 |   "keywords": [
14 |     "rag",
15 |     "mcp",
16 |     "llm",
17 |     "agent",
18 |     "opensearch",
19 |     "multi-agent"
20 |   ],
21 |   "author": "",
22 |   "license": "ISC",
23 |   "dependencies": {
24 |     "@modelcontextprotocol/sdk": "^1.10.1",
25 |     "@modelcontextprotocol/server-filesystem": "^2025.3.28",
26 |     "@opensearch-project/opensearch": "^3.5.1",
27 |     "aws-sdk": "^2.1574.0",
28 |     "chalk": "^5.4.1",
29 |     "csv-parse": "^5.5.5",
30 |     "dotenv": "^16.4.5",
31 |     "langfuse": "^3.30.0",
32 |     "node-fetch": "^3.3.2",
33 |     "openai": "^4.28.4",
34 |     "tsx": "^4.7.1",
35 |     "typescript": "^5.3.3"
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/model_providers.py:
--------------------------------------------------------------------------------
 1 | """Model provider configurations for Strands agents."""
 2 | 
 3 | from strands.models.openai import OpenAIModel
 4 | from ..config import config
 5 | 
 6 | def create_openai_reasoning_model():
 7 |     """Create an OpenAI model instance for reasoning tasks."""
 8 |     return OpenAIModel(
 9 |         client_args={
10 |             "api_key": config.LITELLM_API_KEY,
11 |             "base_url": config.LITELLM_BASE_URL,
12 |         },
13 |         model_id=config.REASONING_MODEL,
14 |         params={
15 |             "temperature": 0.7,
16 |             "max_tokens": 4096,
17 |         }
18 |     )
19 | 
20 | def get_reasoning_model():
21 |     """Get the configured reasoning model for agents."""
22 |     try:
23 |         # Try to use OpenAI client
24 |         return create_openai_reasoning_model()
25 |     except ImportError:
26 |         # Fallback to string model ID
27 |         return config.REASONING_MODEL
28 |     except Exception as e:
29 |         print(f"Warning: Failed to create OpenAI model, falling back to string ID: {e}")
30 |         return config.REASONING_MODEL
31 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "agentic-rag-opensearch",
 3 |   "version": "1.0.0",
 4 |   "description": "Agentic RAG system with MCP and custom embedding",
 5 |   "main": "index.js",
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "dev": "tsx src/index.ts",
 9 |     "embed-knowledge": "tsx src/embedKnowledge.ts",
10 |     "embed-csv": "tsx src/embedCSV.ts",
11 |     "update-rag": "tsx src/updateRAG.ts",
12 |     "rebuild-collection": "tsx src/rebuildCollection.ts",
13 |     "test": "echo \"Error: no test specified\" && exit 1"
14 |   },
15 |   "keywords": [
16 |     "rag",
17 |     "mcp",
18 |     "llm",
19 |     "agent"
20 |   ],
21 |   "author": "",
22 |   "license": "ISC",
23 |   "dependencies": {
24 |     "@aws-sdk/client-bedrock-runtime": "^3.525.0",
25 |     "@modelcontextprotocol/sdk": "^1.10.1",
26 |     "@modelcontextprotocol/server-filesystem": "^2025.3.28",
27 |     "@zilliz/milvus2-sdk-node": "^2.5.8",
28 |     "chalk": "^5.4.1",
29 |     "csv-parse": "^5.5.5",
30 |     "dotenv": "^16.4.5",
31 |     "node-fetch": "^3.3.2",
32 |     "openai": "^4.28.4",
33 |     "tsx": "^4.7.1",
34 |     "typescript": "^5.3.3"
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Strands SDK and core dependencies
 2 | strands-agents>=0.1.0
 3 | strands-agents-tools>=0.1.0
 4 | strands-agents[litellm]>=0.1.0
 5 | 
 6 | # OpenAI and LLM dependencies
 7 | openai>=1.0.0
 8 | litellm>=1.0.0
 9 | 
10 | # MCP dependencies
11 | mcp>=1.0.0
12 | fastmcp>=0.9.0
13 | fastapi>=0.104.0
14 | uvicorn>=0.24.0
15 | 
16 | # AWS and OpenSearch dependencies
17 | boto3>=1.34.0
18 | opensearch-py>=2.4.0
19 | aws-requests-auth>=0.4.3
20 | langchain-aws>=0.1.0
21 | 
22 | # Fix dependency conflicts by using older compatible versions
23 | dill==0.3.7
24 | datasets==2.14.0
25 | pyarrow>=12.0.0,<15.0.0
26 | 
27 | # RAGAs evaluation dependencies - use compatible versions
28 | ragas>=0.1.0,<0.2.0
29 | langchain>=0.1.0,<0.3.0
30 | langchain-core>=0.1.0,<0.3.0
31 | 
32 | # Vector embeddings and ML
33 | numpy>=1.24.0,<2.0.0
34 | scikit-learn>=1.3.0
35 | 
36 | # Data processing
37 | pandas>=2.0.0
38 | python-dotenv>=1.0.0
39 | requests>=2.31.0
40 | httpx>=0.25.0
41 | 
42 | # Observability
43 | langfuse>=2.0.0
44 | 
45 | # Utilities
46 | pydantic>=2.0.0
47 | aiofiles>=23.0.0
48 | 
49 | # Additional FastAPI server dependencies
50 | python-multipart>=0.0.6
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Node.js dependencies
 2 | node_modules/
 3 | npm-debug.log
 4 | yarn-debug.log
 5 | yarn-error.log
 6 | .pnpm-debug.log
 7 | 
 8 | # Environment variables
 9 | .env
10 | .env.local
11 | .env.development.local
12 | .env.test.local
13 | .env.production.local
14 | 
15 | # Build outputs
16 | dist/
17 | build/
18 | out/
19 | .next/
20 | 
21 | # Cache directories
22 | .npm/
23 | .pnpm-store/
24 | .yarn/cache
25 | .yarn/unplugged
26 | .yarn/build-state.yml
27 | .yarn/install-state.gz
28 | 
29 | # Python virtual environments
30 | venv/
31 | env/
32 | .venv/
33 | .env/
34 | ENV/
35 | 
36 | # Python cache files
37 | __pycache__/
38 | *.py[cod]
39 | *$py.class
40 | *.so
41 | 
42 | # Editor directories and files
43 | .idea/
44 | .vscode/
45 | *.swp
46 | *.swo
47 | *~
48 | 
49 | # OS generated files
50 | .DS_Store
51 | .DS_Store?
52 | ._*
53 | .Spotlight-V100
54 | .Trashes
55 | ehthumbs.db
56 | Thumbs.db
57 | 
58 | # Project specific files
59 | agentic-apps/agentic_rag_opensearch/pnpm-lock.yaml
60 | agentic-apps/agentic_rag_opensearch/.knowledge-metadata.json
61 | 
62 | # Agentic apps output directories
63 | agentic-apps/strandsdk_agentic_rag_opensearch/output/
64 | # Local backup files with real secrets
65 | **/*.bak
66 | **/configmap.yaml.bak
67 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/.env.example:
--------------------------------------------------------------------------------
 1 | # OpenAI API Configuration
 2 | # API key for accessing the LLM model
 3 | # This should match the key configured in your LiteLLM gateway
 4 | OPENAI_API_KEY=your-openai-api-key
 5 | 
 6 | # Base URL for the OpenAI-compatible API
 7 | # This should be the URL of your model hosting service (Ray service or LiteLLM gateway)
 8 | OPENAI_BASE_URL=http://your-model-endpoint/v1
 9 | 
10 | # OpenSearch Configuration
11 | # Endpoint URL of your OpenSearch cluster
12 | # This will be automatically set by the setup-opensearch.sh script
13 | OPENSEARCH_ENDPOINT=https://your-opensearch-domain-endpoint
14 | 
15 | # AWS region where your OpenSearch cluster is deployed
16 | AWS_REGION=us-east-1
17 | 
18 | # Langfuse Configuration for LLM Observability
19 | # URL of the Langfuse service deployed in your EKS cluster
20 | # This should be the load balancer URL from your model-observability deployment
21 | LANGFUSE_HOST=http://your-langfuse-host-url
22 | 
23 | # Langfuse public key for authentication
24 | # Get this from your Langfuse dashboard after creating a project
25 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key
26 | 
27 | # Langfuse secret key for authentication
28 | # Get this from your Langfuse dashboard after creating a project
29 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key
30 | 


--------------------------------------------------------------------------------
/model-observability/langfuse-value.yaml:
--------------------------------------------------------------------------------
 1 | # This values.yaml file demonstrates how to use the basic chart with a single, pre-created secret.
 2 | # Secrets must be set manually or via External Secrets Operator like https://external-secrets.io/latest or any other secret management tool.
 3 | langfuse:
 4 |   encryptionKey:
 5 |     secretKeyRef:
 6 |       name: langfuse
 7 |       key: encryption-key
 8 | 
 9 |   salt:
10 |     secretKeyRef:
11 |       name: langfuse
12 |       key: salt
13 | 
14 |   nextauth:
15 |     secret:
16 |       secretKeyRef:
17 |         name: langfuse
18 |         key: nextauth-secret
19 | 
20 |   # Add environment variables for web and worker deployments
21 |   web:
22 |     extraEnvVars:
23 |       - name: REDIS_PORT
24 |         value: "6379"
25 | 
26 |   worker:
27 |     extraEnvVars:
28 |       - name: REDIS_PORT
29 |         value: "6379"
30 | 
31 | postgresql:
32 |   auth:
33 |     existingSecret: langfuse
34 |     secretKeys:
35 |       adminPasswordKey: postgresql-password
36 |       userPasswordKey: postgresql-password
37 | 
38 | clickhouse:
39 |   auth:
40 |     existingSecret: langfuse
41 |     existingSecretKey: clickhouse-password
42 | 
43 | redis:
44 |   auth:
45 |     existingSecret: langfuse
46 |     existingSecretPasswordKey: redis-password
47 | 
48 | s3:
49 |   auth:
50 |     existingSecret: langfuse
51 |     rootUserSecretKey: s3-user
52 |     rootPasswordSecretKey: s3-password
53 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/Dockerfile.mcp:
--------------------------------------------------------------------------------
 1 | # Dockerfile for MCP Server (Tavily Search Server)
 2 | FROM python:3.11-slim as base
 3 | 
 4 | # Set environment variables
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV PYTHONDONTWRITEBYTECODE=1
 7 | ENV DEBIAN_FRONTEND=noninteractive
 8 | 
 9 | # Install system dependencies
10 | RUN apt-get update && apt-get install -y \
11 |     curl \
12 |     git \
13 |     build-essential \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | # Set working directory
17 | WORKDIR /app
18 | 
19 | # Install only minimal dependencies needed for MCP server
20 | RUN pip install --no-cache-dir \
21 |     fastmcp>=0.9.0 \
22 |     python-dotenv>=1.0.0 \
23 |     httpx>=0.25.0 \
24 |     requests>=2.31.0 \
25 |     pydantic>=2.0.0
26 | 
27 | # Copy the entire application
28 | COPY . .
29 | 
30 | # Create necessary directories
31 | RUN mkdir -p logs
32 | 
33 | # Set proper permissions
34 | RUN chmod +x scripts/*.py 2>/dev/null || true
35 | 
36 | # Copy startup and health check scripts
37 | COPY startup-mcp.sh /app/startup-mcp.sh
38 | COPY healthcheck-mcp.sh /app/healthcheck-mcp.sh
39 | 
40 | # Make startup and health check scripts executable
41 | RUN chmod +x /app/startup-mcp.sh /app/healthcheck-mcp.sh
42 | 
43 | # Expose the MCP server port
44 | EXPOSE 8001
45 | 
46 | # Add health check
47 | HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
48 |     CMD /app/healthcheck-mcp.sh
49 | 
50 | # Set the default command
51 | CMD ["/app/startup-mcp.sh"]
52 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/output/antonette.md:
--------------------------------------------------------------------------------
 1 | # Antonette's Journey with Bell's Palsy
 2 | 
 3 | ## Basic Information
 4 | - **Name:** Antonette M. Rivera
 5 | - **Age:** 34
 6 | - **Occupation:** Graphic Designer
 7 | - **Condition:** Bell's Palsy (idiopathic facial paralysis)
 8 | - **Key Treatment:** Eye protection to prevent corneal damage
 9 | 
10 | ## The Story
11 | Antonette, a creative and detail-oriented graphic designer, woke up one morning with sudden facial numbness and drooping on her right side. She noticed she couldn’t blink her right eye fully and struggled to smile. Panicked, she rushed to the clinic, where her doctor confirmed Bell’s palsy—a diagnosis of exclusion requiring careful evaluation to rule out other causes.
12 | 
13 | The physician emphasized the **most critical aspect of initial treatment**: **eye protection**. Antonette learned that her inability to close her right eyelid left her vulnerable to corneal drying and injury. To safeguard her vision, she was prescribed:
14 | - Artificial tears every 2 hours during the day
15 | - An eye patch at night
16 | - Lubricating ointment before sleep
17 | 
18 | Though frustrated by the limitations, Antonette religiously followed the regimen. Over weeks, her facial muscles slowly regained strength. Her diligence paid off—no corneal damage occurred, and by 6 months, her smile was nearly back to normal.
19 | 
20 | Antonette’s story underscores how **early, proactive eye care** can prevent irreversible harm in Bell’s palsy. Today, she advocates for awareness, sharing how prioritizing small daily steps saved her vision and quality of life.


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/debug-auth.js:
--------------------------------------------------------------------------------
 1 | import OpenAI from 'openai';
 2 | import 'dotenv/config';
 3 | 
 4 | // Create a simple test script to debug the API connection with authentication
 5 | async function testConnection() {
 6 |   console.log('Testing OpenAI API connection with authentication...');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set');
 9 |   
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |     defaultHeaders: {
14 |       "api-key": process.env.OPENAI_API_KEY,
15 |       "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`
16 |     }
17 |   });
18 | 
19 |   try {
20 |     // Try a simple chat completion without streaming
21 |     console.log('\nAttempting a simple chat completion...');
22 |     const completion = await openai.chat.completions.create({
23 |       model: 'Qwen/QwQ-32B-AWQ',
24 |       messages: [{ role: 'user', content: 'Hello, how are you?' }],
25 |       stream: false,
26 |     });
27 |     console.log('Chat completion successful:', completion);
28 |   } catch (error) {
29 |     console.error('Error with chat completion:', error);
30 |     
31 |     // Print more detailed error information
32 |     if (error.response) {
33 |       console.log('Response status:', error.response.status);
34 |       console.log('Response headers:', error.response.headers);
35 |       console.log('Response data:', error.response.data);
36 |     }
37 |   }
38 | }
39 | 
40 | testConnection().catch(console.error);
41 | 


--------------------------------------------------------------------------------
/benchmark/prompts.txt:
--------------------------------------------------------------------------------
 1 | In about 150 tokens, explain how generative AI models create new content from training data.
 2 | Using approximately 150 tokens, describe the key differences between GPT-3 and GPT-4 architectures.
 3 | In around 150 tokens, explain how temperature affects AI model output diversity.
 4 | Describe, in about 150 tokens, the role attention mechanisms play in transformer models.
 5 | In approximately 150 tokens, outline the process of fine-tuning a language model.
 6 | Using about 150 tokens, explain how RAG improves AI model accuracy and knowledge.
 7 | In roughly 150 tokens, discuss the main challenges in prompt engineering.
 8 | Explain the concept of zero-shot learning in AI models, using about 150 tokens.
 9 | In approximately 150 tokens, describe how embeddings represent text in vector space.
10 | Discuss the significance of context length in LLMs, using about 150 tokens.
11 | In around 150 tokens, describe the token limitation problem in language models.
12 | Explain how few-shot learning works in generative AI, using approximately 150 tokens.
13 | In about 150 tokens, outline the benefits of model quantization.
14 | Using roughly 150 tokens, explain the concept of knowledge distillation in AI.
15 | In approximately 150 tokens, describe how RLHF improves AI model outputs.
16 | Discuss the key metrics for evaluating GenAI models, using about 150 tokens.
17 | In around 150 tokens, describe the challenges of AI model hallucination.
18 | Explain how prompt injection affects model security, using approximately 150 tokens.
19 | In about 150 tokens, discuss the role of tokenization in language models.
20 | Using approximately 150 tokens, explain the concept of model alignment in AI.


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/startup-mcp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | echo "Starting Strands SDK Tavily MCP Server (Clean Mode)..."
 5 | 
 6 | # Load environment variables from ConfigMap or local file
 7 | if [ -f "/app/config/.env" ]; then
 8 |     echo "Loading environment variables from ConfigMap .env file..."
 9 |     export $(grep -v '^#' /app/config/.env | xargs)
10 |     echo "Environment variables loaded successfully from ConfigMap"
11 | elif [ -f "/app/.env" ]; then
12 |     echo "Loading environment variables from local .env file..."
13 |     export $(grep -v '^#' /app/.env | xargs)
14 |     echo "Environment variables loaded successfully from local file"
15 | else
16 |     echo "WARNING: No .env file found. Using environment variables from Kubernetes."
17 | fi
18 | 
19 | # Verify critical environment variables
20 | echo "Verifying critical environment variables..."
21 | if [ -z "$TAVILY_API_KEY" ]; then
22 |     echo "ERROR: TAVILY_API_KEY is not set"
23 |     exit 1
24 | fi
25 | 
26 | echo "Critical environment variables verified"
27 | 
28 | echo "Starting Tavily MCP Server..."
29 | echo "Server will be available on port 8001"
30 | 
31 | # Load .env and run the MCP server
32 | python -c "
33 | from dotenv import load_dotenv
34 | import os
35 | 
36 | # Try to load from ConfigMap first, then fallback to local
37 | if os.path.exists('/app/config/.env'):
38 |     load_dotenv('/app/config/.env')
39 |     print('Loaded environment from ConfigMap')
40 | elif os.path.exists('/app/.env'):
41 |     load_dotenv('/app/.env')
42 |     print('Loaded environment from local file')
43 | 
44 | # Now run the MCP server
45 | from src.mcp_server_standalone import main
46 | main()
47 | "
48 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/README.md:
--------------------------------------------------------------------------------
 1 | # RAG System with Custom Embedding Model
 2 | 
 3 | This project implements a Retrieval-Augmented Generation (RAG) system using a custom embedding model and Milvus vector database.
 4 | 
 5 | ## Setup
 6 | 
 7 | 1. Install dependencies:
 8 | ```bash
 9 | npm install
10 | ```
11 | 
12 | 2. Create a `.env` file with the following variables:
13 | ```
14 | MILVUS_ADDRESS=your_milvus_address
15 | MILVUS_USERNAME=your_milvus_username
16 | MILVUS_PASSWORD=your_milvus_password
17 | AWS_REGION=your_aws_region
18 | ```
19 | 
20 | ## Embedding the CSV Data
21 | 
22 | To embed the `q_c_data.csv` file from the knowledge folder:
23 | 
24 | ```bash
25 | npm run embed-csv
26 | ```
27 | 
28 | This will process the CSV file, extract question-context pairs, and embed them using the custom embedding endpoint.
29 | 
30 | ## Running the Application
31 | 
32 | To run the main application:
33 | 
34 | ```bash
35 | npm start
36 | ```
37 | 
38 | ## Files
39 | 
40 | - `index.ts`: Main application entry point
41 | - `EmbeddingRetriever.ts`: Handles embedding generation using the custom endpoint
42 | - `MilvusVectorStore.ts`: Manages vector storage and retrieval in Milvus
43 | - `Agent.ts`: Implements the agent that uses the RAG system
44 | - `updateRAG.ts`: Script to process and embed the CSV data
45 | 
46 | ## Custom Embedding Endpoint
47 | 
48 | The system uses a custom embedding endpoint at http://18.232.167.163:8080/v1/embeddings instead of AWS Bedrock.
49 | 
50 | Example request:
51 | ```bash
52 | curl --request POST \
53 |     --url http://18.232.167.163:8080/v1/embeddings \
54 |     --header "Content-Type: application/json" \
55 |     --data '{"content": "Your text here"}'
56 | ```
57 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/debug.js:
--------------------------------------------------------------------------------
 1 | import OpenAI from 'openai';
 2 | import 'dotenv/config';
 3 | 
 4 | // Create a simple test script to debug the API connection
 5 | async function testConnection() {
 6 |   console.log('Testing OpenAI API connection...');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set');
 9 |   
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |   });
14 | 
15 |   try {
16 |     // First try to list models
17 |     console.log('\nAttempting to list models...');
18 |     const models = await openai.models.list();
19 |     console.log('Models available:', models.data.map(m => m.id));
20 |   } catch (error) {
21 |     console.error('Error listing models:', error);
22 |   }
23 | 
24 |   try {
25 |     // Try a simple chat completion without streaming
26 |     console.log('\nAttempting a simple chat completion...');
27 |     const completion = await openai.chat.completions.create({
28 |       model: 'Qwen/QwQ-32B-AWQ',
29 |       messages: [{ role: 'user', content: 'Hello, how are you?' }],
30 |       stream: false,
31 |     });
32 |     console.log('Chat completion successful:', completion);
33 |   } catch (error) {
34 |     console.error('Error with chat completion:', error);
35 |     
36 |     // Print more detailed error information
37 |     if (error.response) {
38 |       console.log('Response status:', error.response.status);
39 |       console.log('Response headers:', error.response.headers);
40 |       console.log('Response data:', error.response.data);
41 |     }
42 |   }
43 | }
44 | 
45 | testConnection().catch(console.error);
46 | 


--------------------------------------------------------------------------------
/base_eks_setup/prometheus-monitoring.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: monitoring.coreos.com/v1
 3 | kind: ServiceMonitor
 4 | metadata:
 5 |   name: vllm
 6 |   namespace: monitoring
 7 |   labels:
 8 |     release: kube-prometheus-stack
 9 | spec:
10 |   endpoints:
11 |   - path: '/metrics/'
12 |     port: metrics
13 |   selector:
14 |     matchLabels:
15 |       app.kubernetes.io/name: kuberay
16 | ---
17 | apiVersion: monitoring.coreos.com/v1
18 | kind: PodMonitor
19 | metadata:
20 |   labels:
21 |     release: prometheus
22 |   name: kuberay-cluster
23 |   namespace: monitoring  # ns where prometheus is deployed
24 | spec:
25 |   podMetricsEndpoints:
26 |     - port: metrics
27 |       path: '/metrics/'
28 |   namespaceSelector:
29 |     matchNames:
30 |       - kuberay-system               # ns where Ray cluster is deployed
31 |   selector:
32 |     matchLabels:
33 |       app.kubernetes.io/name: kuberay
34 | ---
35 | apiVersion: monitoring.coreos.com/v1
36 | kind: PodMonitor
37 | metadata:
38 |   name: ray-workers-monitor
39 |   namespace: monitoring
40 |   labels:
41 |     # `release: $HELM_RELEASE`: Prometheus can only detect PodMonitor with this label.
42 |     release: prometheus
43 | spec:
44 |   jobLabel: ray-workers
45 |   # Only select Kubernetes Pods in the "default" namespace.
46 |   namespaceSelector:
47 |     matchNames:
48 |       - kuberay-system
49 |   # Only select Kubernetes Pods with "matchLabels".
50 |   selector:
51 |     matchLabels:
52 |       ray.io/node-type: worker
53 |   # A list of endpoints allowed as part of this PodMonitor.
54 |   podMetricsEndpoints:
55 |   - port: metrics
56 |     path: '/metrics/'
57 |     relabelings:
58 |     - sourceLabels: [__meta_kubernetes_pod_label_ray_io_cluster]
59 |       targetLabel: ray_io_cluster
60 | 


--------------------------------------------------------------------------------
/model-observability/values.yaml.DEPRECATED:
--------------------------------------------------------------------------------
 1 | # FOR DEMO USE ONY
 2 | langfuse: 
 3 |   additionalEnv:
 4 |     - name : REDIS_PORT
 5 |       value: "6379"
 6 |     # - name: LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY
 7 |     #   value: "changeme"
 8 |       # valueFrom:
 9 |       #   secretKeyRef:
10 |       #     name: langfuse-s3
11 |       #     key: root-password
12 |     # - name: LANGFUSE_S3_BATCH_EXPORT_SECRET_ACCESS_KEY
13 |     #   value: "changeme"
14 |       # valueFrom:
15 |       #   secretKeyRef:
16 |       #     name: langfuse-s3
17 |       #     key: root-password
18 |   salt:
19 |     value: TOO_SALTY
20 |   nextauth:
21 |     secret:
22 |       value: "changeme"
23 | 
24 | postgresql:
25 |   auth:
26 |     password: "changeme"
27 | 
28 | clickhouse:
29 |   auth:
30 |     password: "changeme"
31 | 
32 | redis:
33 |   auth:
34 |     password: "changeme"
35 | 
36 | s3:
37 |   secretAccessKey:
38 |     value: "changeme"
39 | 
40 | # FOR NON DEMO USE YOU SHYLD DEFINE THE SENSITIVE DATA IN SECRETS and/or use cloud storage services such as S3
41 | # langfuse: 
42 | #   salt:
43 | #     secretKeyRef:
44 | #       name: langfuse-general
45 | #       key: salt
46 | #   nextauth:
47 | #     secret:
48 | #       secretKeyRef:
49 | #         name: langfuse-nextauth-secret
50 | #         key: nextauth-secret
51 | 
52 | # postgresql:
53 | #   auth:
54 | #     existingSecret: langfuse-postgresql-auth
55 | #     secretKeys:
56 | #       userPasswordKey: password
57 | 
58 | # clickhouse:
59 | #   auth:
60 | #     existingSecret: langfuse-clickhouse-auth
61 | #     secretKeys:
62 | #       userPasswordKey: password
63 | 
64 | # redis:
65 | #   auth:
66 | #     existingSecret: langfuse-redis-auth
67 | #     secretKeys:
68 | #       userPasswordKey: password    


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/decision.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from langchain_openai import ChatOpenAI
 3 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 4 | from langchain_core.messages import AIMessage, HumanMessage
 5 | from langgraph.graph import StateGraph, START, END
 6 | from langgraph.graph.message import add_messages
 7 | from langgraph.checkpoint.memory import MemorySaver
 8 | from langchain_core.messages import SystemMessage, HumanMessage
 9 | 
10 | from typing import Annotated, List
11 | from langchain.prompts.chat import HumanMessagePromptTemplate
12 | 
13 | 
14 | from typing_extensions import TypedDict
15 | 
16 | import requests 
17 | import json
18 | import base64
19 | 
20 | import logging
21 | 
22 | from langfuse import Langfuse
23 | from datetime import datetime, timedelta
24 | import os
25 | import math
26 | import openai
27 | 
28 | from PyPDF2 import PdfReader
29 | 
30 | from pathlib import Path
31 | 
32 | from langgraph.pregel import RetryPolicy
33 | 
34 | 
35 | 
36 | class State(TypedDict):
37 |     messages: Annotated[list, add_messages]
38 |     
39 | # Add new node for external processing
40 | async def external_automation_node(state: State) -> State:
41 |     """
42 |     Node that calls external processing service
43 |     """
44 |     # Get the last message content
45 |     last_message = state["messages"][-1].content
46 |     
47 |     # Call external auto approve service
48 |     
49 |     return {"messages": [last_message]}
50 | 
51 | async def external_human_node(state: State) -> State:
52 |     """
53 |     Node that calls external processing service
54 |     """
55 |     # Get the last message content
56 |     last_message = state["messages"][-1].content
57 |     
58 |     # Call external auto approve service
59 |     
60 |     return {"messages": [last_message]}


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/scripts/embedKnowledge.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | import { KnowledgeAgent } from "../agents";
 3 | import { logTitle } from "../utils";
 4 | 
 5 | // Function to run knowledge embedding as a standalone script
 6 | async function runKnowledgeEmbedding() {
 7 |     logTitle('STANDALONE KNOWLEDGE EMBEDDING');
 8 |     
 9 |     let knowledgeAgent: KnowledgeAgent | null = null;
10 |     
11 |     try {
12 |         // Initialize the knowledge agent
13 |         knowledgeAgent = new KnowledgeAgent();
14 |         await knowledgeAgent.init();
15 |         
16 |         // Check for changes and embed if needed
17 |         const hasChanges = await knowledgeAgent.checkForChanges();
18 |         
19 |         if (hasChanges) {
20 |             console.log('Changes detected, embedding all knowledge files (including CSV)...');
21 |             
22 |             // Embed all knowledge files (markdown, text, JSON, and CSV)
23 |             const result = await knowledgeAgent.embedKnowledge();
24 |             console.log(`Knowledge embedding result: ${result ? 'SUCCESS' : 'FAILED'}`);
25 |             
26 |             if (result) {
27 |                 console.log('Knowledge embedding completed successfully');
28 |             } else {
29 |                 console.error('Knowledge embedding failed');
30 |                 process.exit(1);
31 |             }
32 |         } else {
33 |             console.log('No changes detected in knowledge files');
34 |         }
35 |         
36 |     } catch (error) {
37 |         console.error('Error in knowledge embedding:', error);
38 |         process.exit(1);
39 |     } finally {
40 |         // Clean up
41 |         if (knowledgeAgent) {
42 |             await knowledgeAgent.close();
43 |         }
44 |     }
45 | }
46 | 
47 | // Main execution
48 | (async () => {
49 |     await runKnowledgeEmbedding();
50 | })();
51 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/debug-no-tools.js:
--------------------------------------------------------------------------------
 1 | import OpenAI from 'openai';
 2 | import 'dotenv/config';
 3 | 
 4 | // Create a test script to debug the API connection with streaming
 5 | async function testConnection() {
 6 |   console.log('Testing OpenAI API connection with streaming...');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set');
 9 |   
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |     defaultHeaders: {
14 |       "api-key": process.env.OPENAI_API_KEY,
15 |       "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`
16 |     }
17 |   });
18 | 
19 |   try {
20 |     // Try a simple chat completion with streaming
21 |     console.log('\nAttempting a chat completion with streaming...');
22 |     const stream = await openai.chat.completions.create({
23 |       model: 'Qwen/QwQ-32B-AWQ',
24 |       messages: [{ role: 'user', content: 'Tell me a short joke' }],
25 |       stream: true,
26 |     });
27 |     
28 |     console.log('Stream response started:');
29 |     for await (const chunk of stream) {
30 |       process.stdout.write(chunk.choices[0]?.delta?.content || '');
31 |     }
32 |     console.log('\nStream completed successfully');
33 |   } catch (error) {
34 |     console.error('Error with chat completion:', error);
35 |     
36 |     // Print more detailed error information
37 |     if (error.response) {
38 |       console.log('Response status:', error.response.status);
39 |       console.log('Response headers:', error.response.headers);
40 |       try {
41 |         console.log('Response data:', await error.response.text());
42 |       } catch (e) {
43 |         console.log('Could not read response data');
44 |       }
45 |     }
46 |   }
47 | }
48 | 
49 | testConnection().catch(console.error);
50 | 


--------------------------------------------------------------------------------
/milvus/milvus-standalone.yaml:
--------------------------------------------------------------------------------
 1 | # kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.yaml
 2 | # kubectl get pods -n cert-manager
 3 | # kubectl apply -f https://raw.githubusercontent.com/zilliztech/milvus-operator/main/deploy/manifests/deployment.yaml
 4 | # kubectl get pods -n milvus-operator
 5 | # deploy Milvus in standalone mode
 6 | # kubectl apply -f milvus-standalone.yaml
 7 | # Uninstall milvus
 8 | # kubectl delete milvus my-release
 9 | # This is a sample to deploy a standalone milvus in milvus-operator's default configurations.
10 | # kubectl port-forward service/my-release-milvus 19530:19530
11 | # kubectl port-forward service/llama-cpp 8080:8080
12 | # curl --request POST --url http://localhost:8080/completion --header "Content-Type: application/json" --data '{"prompt": "What is llama3.2?","n_predict": 128}'
13 | apiVersion: milvus.io/v1beta1
14 | kind: Milvus
15 | metadata:
16 |   name: my-release
17 | spec:
18 |   mode: standalone
19 |   components:
20 |     nodeSelector:
21 |       kubernetes.io/arch: arm64
22 |     resources:
23 |       requests:
24 |         cpu: "1"
25 |   dependencies:
26 |     etcd:
27 |       inCluster:
28 |         deletionPolicy: Delete
29 |         pvcDeletion: true
30 |         values:
31 |           nodeSelector:
32 |             kubernetes.io/arch: arm64
33 |           resources:
34 |             requests: 
35 |               cpu: '1'
36 |     pulsar:
37 |       inCluster:
38 |         deletionPolicy: Delete
39 |         pvcDeletion: true
40 |         values:
41 |           nodeSelector:
42 |             kubernetes.io/arch: arm64
43 |           resources:
44 |             requests: 
45 |               cpu: '1'
46 |     storage:
47 |       inCluster:
48 |         deletionPolicy: Delete
49 |         pvcDeletion: true
50 |         values:
51 |           nodeSelector:
52 |             kubernetes.io/arch: arm64
53 |           resources:
54 |             requests: 
55 |               cpu: '1'


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/startup-main.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | echo "Starting Strands SDK Agentic RAG Main Application (Clean Mode)..."
 5 | 
 6 | # Load environment variables from ConfigMap or local file
 7 | if [ -f "/app/config/.env" ]; then
 8 |     echo "Loading environment variables from ConfigMap .env file..."
 9 |     export $(grep -v '^#' /app/config/.env | xargs)
10 |     echo "Environment variables loaded successfully from ConfigMap"
11 | elif [ -f "/app/.env" ]; then
12 |     echo "Loading environment variables from local .env file..."
13 |     export $(grep -v '^#' /app/.env | xargs)
14 |     echo "Environment variables loaded successfully from local file"
15 | else
16 |     echo "WARNING: No .env file found. Using environment variables from Kubernetes."
17 | fi
18 | 
19 | # Verify critical environment variables
20 | echo "Verifying critical environment variables..."
21 | if [ -z "$LITELLM_API_KEY" ] && [ -z "$OPENAI_API_KEY" ]; then
22 |     echo "ERROR: Neither LITELLM_API_KEY nor OPENAI_API_KEY is set"
23 |     exit 1
24 | fi
25 | 
26 | if [ -z "$OPENSEARCH_ENDPOINT" ]; then
27 |     echo "ERROR: OPENSEARCH_ENDPOINT is not set"
28 |     exit 1
29 | fi
30 | 
31 | if [ -z "$AWS_REGION" ]; then
32 |     echo "ERROR: AWS_REGION is not set"
33 |     exit 1
34 | fi
35 | 
36 | echo "Critical environment variables verified"
37 | 
38 | echo "Starting FastAPI server with clean mode..."
39 | echo "Server will be available on port 8000"
40 | echo "API Documentation available at http://localhost:8000/docs"
41 | 
42 | # Load .env and run the server
43 | python -c "
44 | from dotenv import load_dotenv
45 | import os
46 | 
47 | # Try to load from ConfigMap first, then fallback to local
48 | if os.path.exists('/app/config/.env'):
49 |     load_dotenv('/app/config/.env')
50 |     print('Loaded environment from ConfigMap')
51 | elif os.path.exists('/app/.env'):
52 |     load_dotenv('/app/.env')
53 |     print('Loaded environment from local file')
54 | 
55 | # Now run the server
56 | from src.server import run_server
57 | run_server()
58 | "
59 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "strandsdk-agentic-rag-opensearch"
 7 | version = "1.0.0"
 8 | description = "Multi-Agent RAG system with MCP and OpenSearch using Strands SDK"
 9 | authors = [
10 |     {name = "Your Name", email = "your.email@example.com"}
11 | ]
12 | readme = "README.md"
13 | requires-python = ">=3.9"
14 | classifiers = [
15 |     "Development Status :: 4 - Beta",
16 |     "Intended Audience :: Developers",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Programming Language :: Python :: 3",
19 |     "Programming Language :: Python :: 3.9",
20 |     "Programming Language :: Python :: 3.10",
21 |     "Programming Language :: Python :: 3.11",
22 |     "Programming Language :: Python :: 3.12",
23 | ]
24 | dependencies = [
25 |     "strands-agents>=0.1.0",
26 |     "strands-tools>=0.1.0",
27 |     "openai>=1.0.0",
28 |     "mcp>=1.0.0",
29 |     "fastmcp>=0.9.0",
30 |     "boto3>=1.34.0",
31 |     "opensearch-py>=2.4.0",
32 |     "aws-requests-auth>=0.4.3",
33 |     "numpy>=1.24.0",
34 |     "scikit-learn>=1.3.0",
35 |     "pandas>=2.0.0",
36 |     "python-dotenv>=1.0.0",
37 |     "langfuse>=2.0.0",
38 |     "pydantic>=2.0.0",
39 |     "aiofiles>=23.0.0",
40 | ]
41 | 
42 | [project.optional-dependencies]
43 | dev = [
44 |     "pytest>=7.0.0",
45 |     "pytest-asyncio>=0.21.0",
46 |     "black>=23.0.0",
47 |     "isort>=5.12.0",
48 |     "flake8>=6.0.0",
49 |     "mypy>=1.0.0",
50 | ]
51 | 
52 | [project.scripts]
53 | embed-knowledge = "src.scripts.embed_knowledge:main"
54 | run-agents = "src.main:main"
55 | test-agents = "src.test_agents:main"
56 | 
57 | [tool.setuptools.packages.find]
58 | where = ["."]
59 | include = ["src*"]
60 | 
61 | [tool.black]
62 | line-length = 88
63 | target-version = ['py39']
64 | 
65 | [tool.isort]
66 | profile = "black"
67 | line_length = 88
68 | 
69 | [tool.mypy]
70 | python_version = "3.9"
71 | warn_return_any = true
72 | warn_unused_configs = true
73 | disallow_untyped_defs = true
74 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/MCPClient.ts:
--------------------------------------------------------------------------------
 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 3 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
 4 | 
 5 | export default class MCPClient {
 6 |     public mcp: Client;
 7 |     private command: string;
 8 |     private args: string[]
 9 |     private transport: StdioClientTransport | null = null;
10 |     private tools: Tool[] = [];
11 | 
12 |     constructor(name: string, command: string, args: string[], version?: string) {
13 |         this.mcp = new Client({ name, version: version || "0.0.1" });
14 |         this.command = command;
15 |         this.args = args;
16 |     }
17 | 
18 |     public async init() {
19 |         await this.connectToServer();
20 |     }
21 | 
22 |     public async close() {
23 |         await this.mcp.close();
24 |     }
25 | 
26 |     public getTools() {
27 |         return this.tools;
28 |     }
29 | 
30 |     public callTool(name: string, params: Record<string, any>) {
31 |         return this.mcp.callTool({
32 |             name,
33 |             arguments: params,
34 |         });
35 |     }
36 | 
37 |     private async connectToServer() {
38 |         try {
39 |             this.transport = new StdioClientTransport({
40 |                 command: this.command,
41 |                 args: this.args,
42 |             });
43 |             await this.mcp.connect(this.transport);
44 | 
45 |             const toolsResult = await this.mcp.listTools();
46 |             this.tools = toolsResult.tools.map((tool) => {
47 |                 return {
48 |                     name: tool.name,
49 |                     description: tool.description,
50 |                     inputSchema: tool.inputSchema,
51 |                 };
52 |             });
53 |             console.log(
54 |                 "Connected to server with tools:",
55 |                 this.tools.map(({ name }) => name)
56 |             );
57 |         } catch (e) {
58 |             console.log("Failed to connect to MCP server: ", e);
59 |             throw e;
60 |         }
61 |     }
62 | }


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/test-mcp-server.js:
--------------------------------------------------------------------------------
 1 | // Test script to check MCP server functionality
 2 | import { spawn } from 'child_process';
 3 | import path from 'path';
 4 | 
 5 | const outPath = path.resolve(process.cwd(), 'output');
 6 | console.log(`Testing MCP server with output path: ${outPath}`);
 7 | 
 8 | // Start the MCP server process
 9 | const mcpProcess = spawn('npx', ['-y', '@modelcontextprotocol/server-filesystem', outPath], {
10 |   stdio: ['pipe', 'pipe', 'pipe']
11 | });
12 | 
13 | // Log server output
14 | mcpProcess.stdout.on('data', (data) => {
15 |   console.log(`MCP Server stdout: ${data}`);
16 | });
17 | 
18 | mcpProcess.stderr.on('data', (data) => {
19 |   console.error(`MCP Server stderr: ${data}`);
20 | });
21 | 
22 | // Send a test message to the server after it starts
23 | setTimeout(() => {
24 |   try {
25 |     console.log('Sending test message to MCP server...');
26 |     const message = {
27 |       jsonrpc: '2.0',
28 |       id: '1',
29 |       method: 'listTools',
30 |       params: {}
31 |     };
32 |     
33 |     mcpProcess.stdin.write(JSON.stringify(message) + '\n');
34 |     
35 |     // Wait for response and then try to call the write_file tool
36 |     setTimeout(() => {
37 |       console.log('Attempting to call write_file tool...');
38 |       const writeFileMessage = {
39 |         jsonrpc: '2.0',
40 |         id: '2',
41 |         method: 'callTool',
42 |         params: {
43 |           name: 'write_file',
44 |           arguments: {
45 |             path: 'mcp-test.md',
46 |             content: '# MCP Test\n\nThis file was created using the MCP server.'
47 |           }
48 |         }
49 |       };
50 |       
51 |       mcpProcess.stdin.write(JSON.stringify(writeFileMessage) + '\n');
52 |       
53 |       // Close the server after testing
54 |       setTimeout(() => {
55 |         console.log('Closing MCP server...');
56 |         mcpProcess.kill();
57 |         process.exit(0);
58 |       }, 2000);
59 |     }, 1000);
60 |   } catch (error) {
61 |     console.error(`Error sending message to MCP server: ${error}`);
62 |     mcpProcess.kill();
63 |     process.exit(1);
64 |   }
65 | }, 2000);
66 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/test-endpoint.js:
--------------------------------------------------------------------------------
 1 | // Simple test script to check LLM endpoint connectivity
 2 | import 'dotenv/config';
 3 | import { OpenAI } from 'openai';
 4 | 
 5 | async function testEndpoint() {
 6 |   console.log('Testing LLM endpoint with a simple request');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   
 9 |   // Create OpenAI client with simplified configuration
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |     defaultHeaders: {
14 |       // Using only one authentication method
15 |       "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`
16 |     }
17 |   });
18 | 
19 |   // Simple message without tools
20 |   const messages = [
21 |     { role: "user", content: "Hello, can you respond with a simple greeting?" }
22 |   ];
23 | 
24 |   try {
25 |     console.log('Sending request...');
26 |     
27 |     // Try with the original model name format
28 |     const modelName = 'Qwen/QwQ-32B-AWQ';
29 |     console.log('Using model:', modelName);
30 |     
31 |     const response = await openai.chat.completions.create({
32 |       model: modelName,
33 |       messages: messages,
34 |       stream: false
35 |     });
36 |     
37 |     console.log('Success! Response:');
38 |     console.log(JSON.stringify(response, null, 2));
39 |     return response;
40 |   } catch (error) {
41 |     console.error('Error occurred:');
42 |     console.error('Status:', error.status);
43 |     console.error('Headers:', error.headers);
44 |     
45 |     if (error.response) {
46 |       try {
47 |         const responseText = await error.response.text();
48 |         console.error('Response body:', responseText);
49 |         try {
50 |           const responseJson = JSON.parse(responseText);
51 |           console.error('Response JSON:', JSON.stringify(responseJson, null, 2));
52 |         } catch (parseError) {
53 |           // If not valid JSON, the text version is already logged
54 |         }
55 |       } catch (e) {
56 |         console.error('Could not read response data:', e);
57 |       }
58 |     }
59 |     
60 |     throw error;
61 |   }
62 | }
63 | 
64 | // Run the test
65 | testEndpoint()
66 |   .then(() => console.log('Test completed successfully'))
67 |   .catch(err => console.error('Test failed:', err.message));
68 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/explore_collection.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv
 3 | from pymilvus import connections, Collection
 4 | 
 5 | # Load environment variables
 6 | load_dotenv()
 7 | 
 8 | # Get Milvus address from environment
 9 | milvus_address = os.getenv("MILVUS_ADDRESS", "localhost:19530")
10 | milvus_host, milvus_port = milvus_address.split(":")
11 | 
12 | # Collection name
13 | collection_name = "rag_documents"
14 | 
15 | # Connect to Milvus
16 | print(f"Connecting to Milvus at {milvus_host}:{milvus_port}...")
17 | try:
18 |     connections.connect(
19 |         alias="default", 
20 |         host=milvus_host,
21 |         port=milvus_port
22 |     )
23 |     
24 |     # Get collection
25 |     collection = Collection(collection_name)
26 |     
27 |     # 1. Check the schema
28 |     schema = collection.schema
29 |     print("\n1. Collection Schema:")
30 |     print(f"Collection name: {collection_name}")
31 |     print("Fields:")
32 |     for field in schema.fields:
33 |         print(f"  - {field.name}: {field.dtype} (is_primary_key: {field.is_primary})")
34 |         if hasattr(field, 'params') and field.params:
35 |             print(f"    Vector dimension: {field.params.get('dim')}")
36 |     
37 |     # 2. Get number of entities
38 |     num_entities = collection.num_entities
39 |     print(f"\n2. Number of entities: {num_entities}")
40 |     
41 |     # 3. Examine index information
42 |     print("\n3. Index Information:")
43 |     try:
44 |         index_info = collection.index().params
45 |         print(f"Index params: {index_info}")
46 |     except Exception as e:
47 |         print(f"Could not get index params: {e}")
48 |     
49 |     # Get more detailed information about vector field indexes
50 |     try:
51 |         for field_name in collection.index_info.keys():
52 |             field_index = collection.index_info.get(field_name)
53 |             if field_index:
54 |                 print(f"\nField '{field_name}' index:")
55 |                 print(f"  Index type: {field_index.get('index_type')}")
56 |                 print(f"  Metric type: {field_index.get('metric_type')}")
57 |                 print(f"  Params: {field_index.get('params')}")
58 |     except Exception as e:
59 |         print(f"Could not get detailed index info: {e}")
60 |     
61 |     # Close connection
62 |     connections.disconnect("default")
63 |     
64 | except Exception as e:
65 |     print(f"Error exploring Milvus collection: {e}")
66 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/embedKnowledge.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import EmbeddingRetriever from "./EmbeddingRetriever";
 4 | import { logTitle } from "./utils";
 5 | 
 6 | // Function to process and embed knowledge files
 7 | async function embedKnowledgeFiles() {
 8 |     logTitle('EMBEDDING KNOWLEDGE FILES');
 9 |     
10 |     const knowledgePath = path.join(process.cwd(), 'knowledge');
11 |     
12 |     if (!fs.existsSync(knowledgePath)) {
13 |         console.error(`Knowledge directory not found: ${knowledgePath}`);
14 |         return false;
15 |     }
16 |     
17 |     try {
18 |         // Get all markdown files in the knowledge directory
19 |         const files = fs.readdirSync(knowledgePath)
20 |             .filter(file => file.endsWith('.md'));
21 |         
22 |         console.log(`Found ${files.length} markdown files in the knowledge directory`);
23 |         
24 |         // Initialize the embedding retriever
25 |         const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model");
26 |         
27 |         // Process each file
28 |         for (const file of files) {
29 |             const filePath = path.join(knowledgePath, file);
30 |             console.log(`Processing file: ${file}`);
31 |             
32 |             // Read the file content
33 |             const content = fs.readFileSync(filePath, 'utf-8');
34 |             
35 |             // Embed the document
36 |             await embeddingRetriever.embedDocument(content);
37 |         }
38 |         
39 |         console.log(`Successfully embedded ${files.length} knowledge files`);
40 |         
41 |         // Close Milvus connection when done
42 |         // @ts-ignore - Access private property for cleanup
43 |         if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') {
44 |             // @ts-ignore
45 |             await embeddingRetriever.vectorStore.close();
46 |         }
47 |         
48 |         return true;
49 |     } catch (error) {
50 |         console.error("Error embedding knowledge files:", error);
51 |         return false;
52 |     }
53 | }
54 | 
55 | // Main function
56 | (async () => {
57 |     const success = await embedKnowledgeFiles();
58 |     
59 |     if (success) {
60 |         console.log("Knowledge embedding completed successfully");
61 |     } else {
62 |         console.error("Knowledge embedding failed");
63 |         process.exit(1);
64 |     }
65 | })();
66 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/embedKnowledge.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import EmbeddingRetriever from "./EmbeddingRetriever";
 4 | import { logTitle } from "./utils";
 5 | 
 6 | // Function to process and embed knowledge files
 7 | async function embedKnowledgeFiles() {
 8 |     logTitle('EMBEDDING KNOWLEDGE FILES');
 9 |     
10 |     const knowledgePath = path.join(process.cwd(), '..', 'knowledge');
11 |     
12 |     if (!fs.existsSync(knowledgePath)) {
13 |         console.error(`Knowledge directory not found: ${knowledgePath}`);
14 |         return false;
15 |     }
16 |     
17 |     try {
18 |         // Get all markdown files in the knowledge directory
19 |         const files = fs.readdirSync(knowledgePath)
20 |             .filter(file => file.endsWith('.md'));
21 |         
22 |         console.log(`Found ${files.length} markdown files in the knowledge directory`);
23 |         
24 |         // Initialize the embedding retriever with llamacpp-embedding model
25 |         const embeddingRetriever = new EmbeddingRetriever("llamacpp-embedding");
26 |         
27 |         // Process each file
28 |         for (const file of files) {
29 |             const filePath = path.join(knowledgePath, file);
30 |             console.log(`Processing file: ${file}`);
31 |             
32 |             // Read the file content
33 |             const content = fs.readFileSync(filePath, 'utf-8');
34 |             
35 |             // Embed the document
36 |             await embeddingRetriever.embedDocument(content);
37 |         }
38 |         
39 |         console.log(`Successfully embedded ${files.length} knowledge files`);
40 |         
41 |         // Close OpenSearch connection when done
42 |         // @ts-ignore - Access private property for cleanup
43 |         if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') {
44 |             // @ts-ignore
45 |             await embeddingRetriever.vectorStore.close();
46 |         }
47 |         
48 |         return true;
49 |     } catch (error) {
50 |         console.error("Error embedding knowledge files:", error);
51 |         return false;
52 |     }
53 | }
54 | 
55 | // Main function
56 | (async () => {
57 |     const success = await embedKnowledgeFiles();
58 |     
59 |     if (success) {
60 |         console.log("Knowledge embedding completed successfully");
61 |     } else {
62 |         console.error("Knowledge embedding failed");
63 |         process.exit(1);
64 |     }
65 | })();
66 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/debug-tools.js:
--------------------------------------------------------------------------------
 1 | import OpenAI from 'openai';
 2 | import 'dotenv/config';
 3 | 
 4 | // Create a test script to debug the API connection with tools
 5 | async function testConnection() {
 6 |   console.log('Testing OpenAI API connection with tools...');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set');
 9 |   
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |     defaultHeaders: {
14 |       "api-key": process.env.OPENAI_API_KEY,
15 |       "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`
16 |     }
17 |   });
18 | 
19 |   // Define a simple tool
20 |   const tools = [
21 |     {
22 |       type: "function",
23 |       function: {
24 |         name: "get_current_weather",
25 |         description: "Get the current weather in a given location",
26 |         parameters: {
27 |           type: "object",
28 |           properties: {
29 |             location: {
30 |               type: "string",
31 |               description: "The city and state, e.g. San Francisco, CA",
32 |             },
33 |             unit: {
34 |               type: "string",
35 |               enum: ["celsius", "fahrenheit"],
36 |               description: "The temperature unit to use",
37 |             },
38 |           },
39 |           required: ["location"],
40 |         },
41 |       },
42 |     }
43 |   ];
44 | 
45 |   try {
46 |     // Try a simple chat completion without streaming
47 |     console.log('\nAttempting a chat completion with tools...');
48 |     const completion = await openai.chat.completions.create({
49 |       model: 'Qwen/QwQ-32B-AWQ',
50 |       messages: [{ role: 'user', content: 'What\'s the weather like in Seattle?' }],
51 |       stream: false,
52 |       tools: tools,
53 |     });
54 |     console.log('Chat completion successful:', JSON.stringify(completion, null, 2));
55 |   } catch (error) {
56 |     console.error('Error with chat completion:', error);
57 |     
58 |     // Print more detailed error information
59 |     if (error.response) {
60 |       console.log('Response status:', error.response.status);
61 |       console.log('Response headers:', error.response.headers);
62 |       try {
63 |         console.log('Response data:', await error.response.text());
64 |       } catch (e) {
65 |         console.log('Could not read response data');
66 |       }
67 |     }
68 |   }
69 | }
70 | 
71 | testConnection().catch(console.error);
72 | 


--------------------------------------------------------------------------------
/base_eks_setup/karpenter_nodepool/graviton-nodepool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Graviton Node Pool for ARM-based workloads
 3 | apiVersion: karpenter.sh/v1
 4 | kind: NodePool
 5 | metadata:
 6 |   name: graviton-inference
 7 | spec:
 8 |   limits:
 9 |     cpu: 512
10 |     memory: 4096Gi
11 |   disruption:
12 |     consolidationPolicy: WhenEmptyOrUnderutilized
13 |     consolidateAfter: 30m
14 |   template:
15 |     metadata:
16 |       labels:
17 |         model-inferencing: "graviton-inference"
18 |         ray-control-plane: "false"
19 |     spec:
20 |       nodeClassRef:
21 |         group: karpenter.k8s.aws
22 |         kind: EC2NodeClass
23 |         name: graviton-inference
24 |       expireAfter: 8h
25 |       requirements:
26 |         - key: karpenter.k8s.aws/instance-category
27 |           operator: In
28 |           values:
29 |           - c
30 |           - m
31 |           - r
32 |         - key: karpenter.k8s.aws/instance-generation
33 |           operator: Gt
34 |           values: ["6"]
35 |         - key: kubernetes.io/arch
36 |           operator: In
37 |           values: ["arm64"]
38 |         - key: kubernetes.io/os
39 |           operator: In
40 |           values: ["linux"]
41 |         - key: karpenter.sh/capacity-type
42 |           operator: In
43 |           values: ["on-demand"]
44 | ---
45 | apiVersion: karpenter.k8s.aws/v1
46 | kind: EC2NodeClass
47 | metadata:
48 |   name: graviton-inference
49 | spec:
50 |   kubelet:
51 |     podsPerCore: 2
52 |     maxPods: 20
53 |     systemReserved:
54 |       cpu: 100m
55 |       memory: 100Mi
56 |   subnetSelectorTerms:
57 |     - tags:
58 |         Environment: dev
59 |         Name: ${CLUSTER_NAME}-private-*
60 |         karpenter.sh/discovery: ${CLUSTER_NAME}
61 |   securityGroupSelectorTerms:
62 |     - tags:
63 |         Environment: dev
64 |         karpenter.sh/discovery: ${CLUSTER_NAME}
65 |   amiFamily: "AL2023"
66 |   amiSelectorTerms:
67 |     - name: "amazon-eks-node-al2023-arm64-standard-1.33-*"
68 |   role: KarpenterNode-${CLUSTER_NAME}
69 |   tags:
70 |     Environment: dev
71 |     karpenter.sh/discovery: ${CLUSTER_NAME}
72 |     model-inferencing: "graviton-inference"
73 |     ray-control-plane: "false"
74 |     Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters
75 |   blockDeviceMappings:
76 |     - deviceName: /dev/xvda
77 |       ebs:
78 |         volumeSize: 100Gi
79 |         volumeType: gp3
80 |         iops: 10000
81 |         encrypted: false
82 |         deleteOnTermination: true
83 |         throughput: 256
84 | 


--------------------------------------------------------------------------------
/base_eks_setup/karpenter_nodepool/x86-nodepool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # x86 Node Pool for x86-based workloads
 3 | apiVersion: karpenter.sh/v1
 4 | kind: NodePool
 5 | metadata:
 6 |   name: x86-inference
 7 | spec:
 8 |   limits:
 9 |     cpu: 512
10 |     memory: 4096Gi
11 |   disruption:
12 |     consolidationPolicy: WhenEmptyOrUnderutilized
13 |     consolidateAfter: 30s
14 |   template:
15 |     metadata:
16 |       labels:
17 |         model-inferencing: "x86-inference"
18 |         ray-control-plane: "false"
19 |     spec:
20 |       nodeClassRef:
21 |         group: karpenter.k8s.aws
22 |         kind: EC2NodeClass
23 |         name: x86-inference
24 |       taints:
25 |         - key: "model-inferencing"
26 |           value: "x86-inference"
27 |           effect: NoSchedule
28 |       expireAfter: 8h
29 |       requirements:
30 |         - key: karpenter.k8s.aws/instance-category
31 |           operator: In
32 |           values:
33 |           - c
34 |           - m
35 |           - r
36 |         - key: karpenter.k8s.aws/instance-generation
37 |           operator: Gt
38 |           values: ["6"]
39 |         - key: kubernetes.io/arch
40 |           operator: In
41 |           values: ["amd64"]
42 |         - key: kubernetes.io/os
43 |           operator: In
44 |           values: ["linux"]
45 |         - key: karpenter.sh/capacity-type
46 |           operator: In
47 |           values: ["on-demand"]
48 | ---
49 | apiVersion: karpenter.k8s.aws/v1
50 | kind: EC2NodeClass
51 | metadata:
52 |   name: x86-inference
53 | spec:
54 |   kubelet:
55 |     podsPerCore: 2
56 |     maxPods: 20
57 |     systemReserved:
58 |       cpu: 100m
59 |       memory: 100Mi
60 |   subnetSelectorTerms:
61 |     - tags:
62 |         Environment: dev
63 |         Name: ${CLUSTER_NAME}-private-*
64 |         karpenter.sh/discovery: ${CLUSTER_NAME}
65 |   securityGroupSelectorTerms:
66 |     - tags:
67 |         Environment: dev
68 |         karpenter.sh/discovery: ${CLUSTER_NAME}
69 |   amiFamily: "AL2023"
70 |   amiSelectorTerms:
71 |     - name: "amazon-eks-node-al2023-x86_64-standard-1.29-*"
72 |   role: KarpenterNode-${CLUSTER_NAME}
73 |   tags:
74 |     Environment: dev
75 |     karpenter.sh/discovery: ${CLUSTER_NAME}
76 |     model-inferencing: "x86-inference"
77 |     ray-control-plane: "false"
78 |     Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters
79 |   blockDeviceMappings:
80 |     - deviceName: /dev/xvda
81 |       ebs:
82 |         volumeSize: 100Gi
83 |         volumeType: gp3
84 |         encrypted: false
85 |         deleteOnTermination: true
86 | 


--------------------------------------------------------------------------------
/base_eks_setup/karpenter_nodepool/inf2-nodepool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # Inf2 Node Pool for ML inference workloads
 3 | apiVersion: karpenter.sh/v1
 4 | kind: NodePool
 5 | metadata:
 6 |   name: inf2-inference
 7 | spec:
 8 |   limits:
 9 |     cpu: 512
10 |     memory: 4096Gi
11 |   disruption:
12 |     consolidationPolicy: WhenEmptyOrUnderutilized
13 |     consolidateAfter: 30m
14 |   template:
15 |     metadata:
16 |       labels:
17 |         model-inferencing: "inf2-inference"
18 |         ray-control-plane: "false"
19 |     spec:
20 |       nodeClassRef:
21 |         group: karpenter.k8s.aws
22 |         kind: EC2NodeClass
23 |         name: inf2-inference
24 |       taints:
25 |         - key: "model-inferencing"
26 |           value: "inf2-inference"
27 |           effect: NoSchedule
28 |       expireAfter: 8h
29 |       requirements:
30 |         - key: karpenter.k8s.aws/instance-family
31 |           operator: In
32 |           values: ["inf2"]
33 |         - key: kubernetes.io/arch
34 |           operator: In
35 |           values: ["amd64"]
36 |         - key: kubernetes.io/os
37 |           operator: In
38 |           values: ["linux"]
39 |         - key: karpenter.sh/capacity-type
40 |           operator: In
41 |           values: ["on-demand"]
42 |         - key: karpenter.k8s.aws/instance-accelerator-manufacturer
43 |           operator: In
44 |           values: ["aws"]
45 | ---
46 | apiVersion: karpenter.k8s.aws/v1
47 | kind: EC2NodeClass
48 | metadata:
49 |   name: inf2-inference
50 | spec:
51 |   kubelet:
52 |     podsPerCore: 2
53 |     maxPods: 20
54 |     systemReserved:
55 |       cpu: 500m
56 |       memory: 900Mi
57 |   subnetSelectorTerms:
58 |     - tags:
59 |         Environment: dev
60 |         Name: ${CLUSTER_NAME}-private-*
61 |         karpenter.sh/discovery: ${CLUSTER_NAME}
62 |   securityGroupSelectorTerms:
63 |     - tags:
64 |         Environment: dev
65 |         karpenter.sh/discovery: ${CLUSTER_NAME}
66 |   amiFamily: "AL2023"
67 |   amiSelectorTerms:
68 |     - name: "amazon-eks-node-al2023-x86_64-neuron-1.33-v*"
69 |   role: KarpenterNode-${CLUSTER_NAME}
70 |   tags:
71 |     Environment: dev
72 |     karpenter.sh/discovery: ${CLUSTER_NAME}
73 |     model-inferencing: "inf2-inference"
74 |     ray-control-plane: "false"
75 |     Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters
76 |   blockDeviceMappings:
77 |   - deviceName: /dev/xvda
78 |     ebs:
79 |       volumeSize: 300Gi
80 |       volumeType: gp3
81 |       iops: 8000
82 |       encrypted: false
83 |       deleteOnTermination: true
84 |       throughput: 256
85 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/LangfuseConfig.ts:
--------------------------------------------------------------------------------
 1 | import { Langfuse } from 'langfuse';
 2 | import 'dotenv/config';
 3 | 
 4 | // Validate environment variables
 5 | const publicKey = process.env.LANGFUSE_PUBLIC_KEY;
 6 | const secretKey = process.env.LANGFUSE_SECRET_KEY;
 7 | const host = process.env.LANGFUSE_HOST;
 8 | 
 9 | if (!publicKey || !secretKey || !host) {
10 |   console.warn('Langfuse configuration incomplete. Some environment variables are missing:');
11 |   console.warn('- LANGFUSE_PUBLIC_KEY:', publicKey ? '✓' : '✗');
12 |   console.warn('- LANGFUSE_SECRET_KEY:', secretKey ? '✓' : '✗');
13 |   console.warn('- LANGFUSE_HOST:', host ? '✓' : '✗');
14 |   console.warn('Langfuse tracing will be disabled.');
15 | }
16 | 
17 | // Initialize Langfuse client
18 | export const langfuse = publicKey && secretKey && host ? new Langfuse({
19 |   secretKey,
20 |   publicKey,
21 |   baseUrl: host,
22 |   flushAt: 1, // Send traces immediately for development
23 | }) : null;
24 | 
25 | // Helper function to create a trace
26 | export function createTrace(name: string, input?: any, metadata?: any) {
27 |   if (!langfuse) {
28 |     console.warn('Langfuse not configured, skipping trace creation');
29 |     return null;
30 |   }
31 |   
32 |   return langfuse.trace({
33 |     name,
34 |     input,
35 |     metadata: {
36 |       ...metadata,
37 |       timestamp: new Date().toISOString(),
38 |       environment: 'development'
39 |     }
40 |   });
41 | }
42 | 
43 | // Helper function to create a span within a trace
44 | export function createSpan(trace: any, name: string, input?: any, metadata?: any) {
45 |   if (!trace) {
46 |     return null;
47 |   }
48 |   
49 |   return trace.span({
50 |     name,
51 |     input,
52 |     metadata: {
53 |       ...metadata,
54 |       timestamp: new Date().toISOString()
55 |     }
56 |   });
57 | }
58 | 
59 | // Helper function to create a generation (LLM call) within a trace
60 | export function createGeneration(trace: any, name: string, input?: any, model?: string, metadata?: any) {
61 |   if (!trace) {
62 |     return null;
63 |   }
64 |   
65 |   return trace.generation({
66 |     name,
67 |     input,
68 |     model,
69 |     metadata: {
70 |       ...metadata,
71 |       timestamp: new Date().toISOString()
72 |     }
73 |   });
74 | }
75 | 
76 | // Helper function to flush traces (useful for cleanup)
77 | export async function flushLangfuse() {
78 |   if (langfuse) {
79 |     await langfuse.flushAsync();
80 |   }
81 | }
82 | 
83 | // Export configuration status
84 | export const isLangfuseEnabled = !!langfuse;
85 | 
86 | console.log(`Langfuse tracing: ${isLangfuseEnabled ? 'ENABLED' : 'DISABLED'}`);
87 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/.env.example:
--------------------------------------------------------------------------------
 1 | # LiteLLM Configuration for Reasoning Models
 2 | LITELLM_API_KEY=your-litellm-api-key
 3 | LITELLM_BASE_URL=http://your-litellm-server:8080/v1
 4 | REASONING_MODEL=qwen-qwq-32b-preview
 5 | 
 6 | # Embedding Configuration (separate from reasoning)
 7 | EMBEDDING_API_KEY=your-embedding-api-key
 8 | EMBEDDING_BASE_URL=http://your-embedding-server:8080/v1
 9 | EMBEDDING_MODEL=llamacpp-embedding
10 | 
11 | 
12 | # AWS Configuration  
13 | AWS_REGION=us-east-1
14 | OPENSEARCH_ENDPOINT=https://your-opensearch-domain.region.es.amazonaws.com
15 | 
16 | # Tavily Web Search Configuration
17 | TAVILY_API_KEY=your-tavily-api-key
18 | 
19 | # Optional: Langfuse for observability
20 | LANGFUSE_HOST=https://cloud.langfuse.com
21 | LANGFUSE_PUBLIC_KEY=your-public-key
22 | LANGFUSE_SECRET_KEY=your-secret-key
23 | 
24 | # Application Settings
25 | KNOWLEDGE_DIR=knowledge
26 | OUTPUT_DIR=output
27 | VECTOR_INDEX_NAME=knowledge-embeddings
28 | TOP_K_RESULTS=5
29 | BYPASS_TOOL_CONSENT=true
30 | 
31 | # Configuration Notes:
32 | # 
33 | # LITELLM_API_KEY: API key for your LiteLLM server hosting reasoning models
34 | # LITELLM_BASE_URL: Endpoint for LiteLLM server (e.g., http://localhost:8080/v1)
35 | # REASONING_MODEL: Model name for agent reasoning tasks (e.g., qwen-qwq-32b-preview)
36 | # 
37 | # EMBEDDING_API_KEY: API key for embedding service (can be same as LITELLM_API_KEY)
38 | # EMBEDDING_BASE_URL: Endpoint for embedding generation (can be same as LITELLM_BASE_URL)
39 | # EMBEDDING_MODEL: Model name for generating embeddings (e.g., llamacpp-embedding)
40 | # 
41 | # OPENAI_*: Legacy configuration for backward compatibility
42 | # DEFAULT_MODEL: Fallback model ID if LiteLLM fails
43 | # 
44 | # AWS_REGION: AWS region for OpenSearch and other AWS services
45 | # OPENSEARCH_ENDPOINT: Your AWS OpenSearch domain endpoint
46 | # 
47 | # TAVILY_API_KEY: API key for Tavily web search service (get from https://tavily.com)
48 | # 
49 | # LANGFUSE_*: Optional observability tracking (leave empty to disable)
50 | # 
51 | # KNOWLEDGE_DIR: Directory containing knowledge files to embed
52 | # OUTPUT_DIR: Directory for generated outputs and reports
53 | # VECTOR_INDEX_NAME: OpenSearch index name for vector storage
54 | # TOP_K_RESULTS: Default number of search results to return
55 | #
56 | # Model Usage:
57 | # - Reasoning Tasks (All Agents): Uses REASONING_MODEL via LiteLLM
58 | # - Embedding Tasks (RAG): Uses EMBEDDING_MODEL via embedding endpoint
59 | # - Web Search: Uses Tavily API for real-time information when RAG relevance is low
60 | # - Both LLM models can point to the same LiteLLM server with different model names
61 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/test-agents.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | import { KnowledgeAgent, RAGAgent, MCPAgent, SupervisorAgent } from "./agents";
 3 | import MCPClient from "./MCPClient";
 4 | import { logTitle } from "./utils";
 5 | import path from "path";
 6 | 
 7 | // Simple test to verify all agents can be initialized
 8 | async function testAgents() {
 9 |     logTitle('TESTING MULTI-AGENT SYSTEM');
10 |     
11 |     try {
12 |         // Test Knowledge Agent
13 |         console.log('Testing Knowledge Agent...');
14 |         const knowledgeAgent = new KnowledgeAgent();
15 |         await knowledgeAgent.init();
16 |         const hasChanges = await knowledgeAgent.checkForChanges();
17 |         console.log(`Knowledge changes detected: ${hasChanges}`);
18 |         await knowledgeAgent.close();
19 |         console.log('✓ Knowledge Agent test passed');
20 |         
21 |         // Test RAG Agent
22 |         console.log('\nTesting RAG Agent...');
23 |         const ragAgent = new RAGAgent();
24 |         await ragAgent.init();
25 |         const stats = ragAgent.getStats();
26 |         console.log('RAG Agent stats:', stats);
27 |         await ragAgent.close();
28 |         console.log('✓ RAG Agent test passed');
29 |         
30 |         // Test MCP Agent
31 |         console.log('\nTesting MCP Agent...');
32 |         const outPath = path.resolve(process.cwd(), 'output');
33 |         const fileMCP = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]);
34 |         const mcpAgent = new MCPAgent([fileMCP]);
35 |         await mcpAgent.init();
36 |         const mcpStats = mcpAgent.getStats();
37 |         console.log('MCP Agent stats:', mcpStats);
38 |         const tools = await mcpAgent.listAvailableTools();
39 |         console.log(`Available tools: ${tools.length}`);
40 |         await mcpAgent.close();
41 |         console.log('✓ MCP Agent test passed');
42 |         
43 |         // Test Supervisor Agent
44 |         console.log('\nTesting Supervisor Agent...');
45 |         const fileMCP2 = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]);
46 |         const supervisor = new SupervisorAgent([fileMCP2]);
47 |         await supervisor.init();
48 |         console.log('Supervisor Agent initialized successfully');
49 |         await supervisor.close();
50 |         console.log('✓ Supervisor Agent test passed');
51 |         
52 |         console.log('\n🎉 All agent tests passed successfully!');
53 |         
54 |     } catch (error) {
55 |         console.error('❌ Agent test failed:', error);
56 |         process.exit(1);
57 |     }
58 | }
59 | 
60 | // Run the test
61 | (async () => {
62 |     await testAgents();
63 | })();
64 | 


--------------------------------------------------------------------------------
/base_eks_setup/karpenter_nodepool/gpu-nodepool.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # GPU Node Pool for ML inference workloads
 3 | apiVersion: karpenter.sh/v1
 4 | kind: NodePool
 5 | metadata:
 6 |   name: gpu-inference
 7 | spec:
 8 |   limits:
 9 |     cpu: 1024
10 |     memory: 8192Gi
11 |   disruption:
12 |     consolidationPolicy: WhenEmptyOrUnderutilized
13 |     consolidateAfter: 30m
14 |   template:
15 |     metadata:
16 |       labels:
17 |         model-inferencing: "gpu-inference"
18 |         ray-control-plane: "false"
19 |         nvidia.com/gpu: "present"
20 |     spec:
21 |       nodeClassRef:
22 |         group: karpenter.k8s.aws
23 |         kind: EC2NodeClass
24 |         name: gpu-inference
25 |       expireAfter: 8h
26 |       requirements:
27 |         - key: karpenter.k8s.aws/instance-category
28 |           operator: In
29 |           values:
30 |           - g
31 |         - key: karpenter.k8s.aws/instance-family
32 |           operator: In
33 |           values: ["g5", "g6"]
34 |         - key: kubernetes.io/arch
35 |           operator: In
36 |           values: ["amd64"]
37 |         - key: kubernetes.io/os
38 |           operator: In
39 |           values: ["linux"]
40 |         - key: karpenter.sh/capacity-type
41 |           operator: In
42 |           values: ["on-demand"]
43 |         - key: karpenter.k8s.aws/instance-gpu-manufacturer
44 |           operator: In
45 |           values: ["nvidia"]
46 |         - key: karpenter.k8s.aws/instance-gpu-count
47 |           operator: In
48 |           values: ["4"]
49 | ---
50 | apiVersion: karpenter.k8s.aws/v1
51 | kind: EC2NodeClass
52 | metadata:
53 |   name: gpu-inference
54 | spec:
55 |   kubelet:
56 |     podsPerCore: 2
57 |     maxPods: 20
58 |     systemReserved:
59 |       cpu: 500m
60 |       memory: 900Mi
61 |   subnetSelectorTerms:
62 |     - tags:
63 |         Environment: dev
64 |         Name: ${CLUSTER_NAME}-private-*
65 |         karpenter.sh/discovery: ${CLUSTER_NAME}
66 |   securityGroupSelectorTerms:
67 |     - tags:
68 |         Environment: dev
69 |         karpenter.sh/discovery: ${CLUSTER_NAME}
70 |   amiFamily: "AL2023"
71 |   amiSelectorTerms:
72 |     - name: "amazon-eks-node-al2023-x86_64-nvidia-1.33-v*"
73 |   role: KarpenterNode-${CLUSTER_NAME}
74 |   tags:
75 |     Environment: dev
76 |     karpenter.sh/discovery: ${CLUSTER_NAME}
77 |     model-inferencing: "gpu-inference"
78 |     ray-control-plane: "false"
79 |     Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters
80 |   blockDeviceMappings:
81 |   - deviceName: /dev/xvda
82 |     ebs:
83 |       volumeSize: 500Gi
84 |       volumeType: gp3
85 |       iops: 10000
86 |       encrypted: false
87 |       deleteOnTermination: true
88 |       throughput: 512
89 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic-idp/storage.py:
--------------------------------------------------------------------------------
 1 | from langchain_openai import ChatOpenAI
 2 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 3 | from langchain_core.messages import AIMessage, HumanMessage
 4 | from langgraph.graph import StateGraph, START, END
 5 | from langgraph.graph.message import add_messages
 6 | from langgraph.checkpoint.memory import MemorySaver
 7 | from langchain_core.messages import SystemMessage, HumanMessage
 8 | 
 9 | from typing import Annotated, List
10 | from langchain.prompts.chat import HumanMessagePromptTemplate
11 | 
12 | 
13 | from typing_extensions import TypedDict
14 | 
15 | import requests 
16 | import json
17 | import base64
18 | 
19 | import logging
20 | 
21 | from langfuse import Langfuse
22 | from datetime import datetime, timedelta
23 | import os
24 | import math
25 | import openai
26 | 
27 | from PyPDF2 import PdfReader
28 | 
29 | from pathlib import Path
30 | 
31 | from langgraph.pregel import RetryPolicy
32 | 
33 | from decision import  State
34 | 
35 | # External function to store the data in s3
36 | async def call_store_service(text: str) -> str:
37 |     """
38 |     External function to process text through a service
39 |     """
40 |     try:
41 |         # Example API call - replace with your actual service endpoint
42 |         # response = requests.post(
43 |         #     "http://your-service-endpoint/process",
44 |         #     json={"text": text}
45 |         # )
46 |         # return response.json()
47 |         print(f"Making storage Call with data {text}")
48 |         return {"result": "success"}
49 |     except Exception as e:
50 |         logging.error(f"External service error: {str(e)}")
51 |         return {"error": str(e)}
52 | 
53 | # Add new node for external processing
54 | async def external_storage_node(state: State) -> State:
55 |     """
56 |     Node that calls external processing service
57 |     """
58 |     # Get the last message content
59 |     # last_message = state["messages"]
60 |     # translated = [state["messages"][0]] + [AIMessage(content=msg.content) for msg in state["messages"][1:]]
61 |     ai_messages = [msg for msg in state["messages"] if isinstance(msg, AIMessage)]
62 |     print(f"AI Messages to Store: {json.dumps([msg.content for msg in ai_messages], indent=2)}")
63 | 
64 |     
65 |     # [-1].content
66 |     print(f"Data to Store {ai_messages}")
67 |     print(json.dumps([msg.content for msg in ai_messages], indent=2))
68 |     
69 |     # Call external service
70 |     result = await call_store_service(json.dumps([msg.content for msg in ai_messages], indent=2))
71 |     
72 |     # Create new message with processed result
73 |     processed_message = HumanMessage(
74 |         content=f"External Processing Results: {json.dumps(result, indent=2)}"
75 |     )
76 |     
77 |     return {"messages": [processed_message]}


--------------------------------------------------------------------------------
/model-hosting/standalone-llamacpp-embedding.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: PersistentVolumeClaim
 4 | metadata:
 5 |   name: llamacpp-embedding-server
 6 |   annotations:
 7 |     kubernetes.io/pvc-protection: "false"  
 8 | spec:
 9 |   accessModes:
10 |   - ReadWriteOnce
11 |   resources:
12 |     requests:
13 |       storage: 100Gi
14 |   storageClassName: gp3
15 |   volumeMode: Filesystem
16 | ---
17 | apiVersion: apps/v1
18 | kind: Deployment
19 | metadata:
20 |   name: llamacpp-embedding-server
21 |   labels:
22 |     app: llamacpp-embedding-server
23 | spec:
24 |   replicas: 1
25 |   selector:
26 |     matchLabels:
27 |       app: llamacpp-embedding-server
28 |   template:
29 |     metadata:
30 |       labels:
31 |         app: llamacpp-embedding-server
32 |     spec:
33 |       nodeSelector:
34 |             kubernetes.io/arch: arm64
35 |       affinity:
36 |             nodeAffinity:
37 |               requiredDuringSchedulingIgnoredDuringExecution:
38 |                 nodeSelectorTerms:
39 |                 - matchExpressions:
40 |                   - key: kubernetes.io/arch
41 |                     operator: In
42 |                     values:
43 |                     - arm64 
44 |       volumes:
45 |       - name: cache-volume
46 |         persistentVolumeClaim:
47 |           claimName: llamacpp-embedding-server
48 |       containers:
49 |       - name: llamacpp-embedding-server
50 |         image: ghcr.io/ggml-org/llama.cpp:server
51 |         args: [
52 |           "--model-url https://huggingface.co/ChristianAzinn/snowflake-arctic-embed-s-gguf/blob/main/snowflake-arctic-embed-s-f16.GGUF --port 8000 --host 0.0.0.0 --embedding --threads 16"
53 |         ]
54 |         env:
55 |         - name: OMP_NUM_THREADS
56 |           value: "16"
57 |         ports:
58 |         - containerPort: 8000
59 |         resources:
60 |           limits:
61 |             memory: 32Gi
62 |           requests:
63 |             cpu: "16"
64 |             memory: 32Gi
65 |         volumeMounts:
66 |         - mountPath: /models
67 |           name: cache-volume
68 |         livenessProbe:
69 |           httpGet:
70 |             path: /health
71 |             port: 8000
72 |           initialDelaySeconds: 120
73 |           periodSeconds: 10
74 |           failureThreshold: 15
75 |           successThreshold: 1
76 |         readinessProbe:
77 |           httpGet:
78 |             path: /health
79 |             port: 8000
80 |           initialDelaySeconds: 120
81 |           periodSeconds: 10
82 | 
83 | ---
84 | apiVersion: v1
85 | kind: Service
86 | metadata:
87 |   name: llamacpp-embedding-server
88 | spec:
89 |   ports:
90 |   - name: http-llamacpp-embedding-server
91 |     port: 8000
92 |     protocol: TCP
93 |     targetPort: 8000
94 |   selector:
95 |     app: llamacpp-embedding-server
96 |   sessionAffinity: None
97 |   type: ClusterIP
98 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/embedCSV.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import { parse } from "csv-parse/sync";
 4 | import EmbeddingRetriever from "./EmbeddingRetriever";
 5 | import { logTitle } from "./utils";
 6 | 
 7 | // Function to process and embed CSV data
 8 | async function processCSVFile(filePath: string) {
 9 |     logTitle('PROCESSING CSV FILE');
10 |     console.log(`Processing file: ${filePath}`);
11 |     
12 |     try {
13 |         // Read the CSV file
14 |         const fileContent = fs.readFileSync(filePath, 'utf-8');
15 |         
16 |         // Parse the CSV content
17 |         const records = parse(fileContent, {
18 |             columns: true,
19 |             skip_empty_lines: true
20 |         });
21 |         
22 |         console.log(`Found ${records.length} records in the CSV file`);
23 |         
24 |         // Initialize the embedding retriever with llamacpp-embedding model
25 |         const embeddingRetriever = new EmbeddingRetriever("llamacpp-embedding");
26 |         
27 |         // Process each record
28 |         let processedCount = 0;
29 |         for (const record of records) {
30 |             // Combine question and context for better retrieval
31 |             const documentText = `Question: ${record.question}\nContext: ${record.context}`;
32 |             
33 |             // Embed the document
34 |             await embeddingRetriever.embedDocument(documentText);
35 |             
36 |             processedCount++;
37 |             if (processedCount % 10 === 0) {
38 |                 console.log(`Processed ${processedCount}/${records.length} records`);
39 |             }
40 |         }
41 |         
42 |         console.log(`Successfully embedded ${processedCount} records from the CSV file`);
43 |         
44 |         // Close OpenSearch connection when done
45 |         // @ts-ignore - Access private property for cleanup
46 |         if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') {
47 |             // @ts-ignore
48 |             await embeddingRetriever.vectorStore.close();
49 |         }
50 |         
51 |         return true;
52 |     } catch (error) {
53 |         console.error("Error processing CSV file:", error);
54 |         return false;
55 |     }
56 | }
57 | 
58 | // Main function
59 | (async () => {
60 |     const csvFilePath = path.join(process.cwd(), '..', 'knowledge', 'q_c_data.csv');
61 |     
62 |     if (!fs.existsSync(csvFilePath)) {
63 |         console.error(`File not found: ${csvFilePath}`);
64 |         process.exit(1);
65 |     }
66 |     
67 |     const success = await processCSVFile(csvFilePath);
68 |     
69 |     if (success) {
70 |         console.log("CSV processing completed successfully");
71 |     } else {
72 |         console.error("CSV processing failed");
73 |         process.exit(1);
74 |     }
75 | })();
76 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/Dockerfile.main:
--------------------------------------------------------------------------------
 1 | # Dockerfile for Main Application (FastAPI Server)
 2 | FROM python:3.11-slim as base
 3 | 
 4 | # Set environment variables
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV PYTHONDONTWRITEBYTECODE=1
 7 | ENV DEBIAN_FRONTEND=noninteractive
 8 | 
 9 | # Install system dependencies
10 | RUN apt-get update && apt-get install -y \
11 |     curl \
12 |     git \
13 |     build-essential \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | # Set working directory
17 | WORKDIR /app
18 | 
19 | # Copy requirements first for better caching
20 | COPY requirements.txt .
21 | 
22 | # Install Python dependencies in stages to resolve conflicts
23 | # First install core dependencies without strands-agents-tools
24 | RUN pip install --no-cache-dir \
25 |     openai>=1.0.0 \
26 |     litellm>=1.0.0 \
27 |     mcp>=1.0.0 \
28 |     fastmcp>=0.9.0 \
29 |     fastapi>=0.104.0 \
30 |     uvicorn>=0.24.0 \
31 |     boto3>=1.34.0 \
32 |     opensearch-py>=2.4.0 \
33 |     aws-requests-auth>=0.4.3
34 | 
35 | # Install datasets and ragas with compatible dill version
36 | RUN pip install --no-cache-dir \
37 |     "dill>=0.3.0,<0.3.8" \
38 |     "datasets==2.14.0" \
39 |     "pyarrow>=12.0.0,<15.0.0" \
40 |     "ragas>=0.1.0,<0.2.0"
41 | 
42 | # Install compatible langchain versions (use older langchain-aws that works with older langchain-core)
43 | RUN pip install --no-cache-dir \
44 |     "langchain>=0.1.0,<0.3.0" \
45 |     "langchain-core>=0.1.0,<0.3.0" \
46 |     "langchain-aws>=0.1.0,<0.2.0"
47 | 
48 | # Install strands packages (which will upgrade dill but should work)
49 | RUN pip install --no-cache-dir \
50 |     strands-agents>=0.1.0 \
51 |     strands-agents-tools>=0.1.0 \
52 |     "strands-agents[litellm]>=0.1.0"
53 | 
54 | # Install remaining dependencies
55 | RUN pip install --no-cache-dir \
56 |     "numpy>=1.24.0,<2.0.0" \
57 |     "scikit-learn>=1.3.0" \
58 |     "pandas>=2.0.0" \
59 |     "python-dotenv>=1.0.0" \
60 |     "requests>=2.31.0" \
61 |     "httpx>=0.25.0" \
62 |     "langfuse>=2.0.0" \
63 |     "pydantic>=2.0.0" \
64 |     "aiofiles>=23.0.0" \
65 |     "python-multipart>=0.0.6"
66 | 
67 | # Copy the entire application
68 | COPY . .
69 | 
70 | # Create necessary directories
71 | RUN mkdir -p knowledge output logs
72 | 
73 | # Set proper permissions
74 | RUN chmod +x scripts/*.py 2>/dev/null || true
75 | 
76 | # Copy startup and health check scripts
77 | COPY startup-main.sh /app/startup-main.sh
78 | COPY healthcheck-main.sh /app/healthcheck-main.sh
79 | 
80 | # Make startup and health check scripts executable
81 | RUN chmod +x /app/startup-main.sh /app/healthcheck-main.sh
82 | 
83 | # Expose the FastAPI server port
84 | EXPOSE 8000
85 | 
86 | # Add health check
87 | HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
88 |     CMD /app/healthcheck-main.sh
89 | 
90 | # Set the default command
91 | CMD ["/app/startup-main.sh"]
92 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/test-endpoint-with-tools.js:
--------------------------------------------------------------------------------
 1 | // Test script to check LLM endpoint with tools
 2 | import 'dotenv/config';
 3 | import { OpenAI } from 'openai';
 4 | 
 5 | async function testEndpointWithTools() {
 6 |   console.log('Testing LLM endpoint with tools');
 7 |   console.log('Base URL:', process.env.OPENAI_BASE_URL);
 8 |   
 9 |   // Create OpenAI client with simplified configuration
10 |   const openai = new OpenAI({
11 |     apiKey: process.env.OPENAI_API_KEY,
12 |     baseURL: process.env.OPENAI_BASE_URL,
13 |     defaultHeaders: {
14 |       // Using only one authentication method
15 |       "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`
16 |     }
17 |   });
18 | 
19 |   // Simple message with tools
20 |   const messages = [
21 |     { role: "user", content: "What's the weather like in Seattle?" }
22 |   ];
23 | 
24 |   // Define a simple tool
25 |   const tools = [
26 |     {
27 |       type: "function",
28 |       function: {
29 |         name: "get_weather",
30 |         description: "Get the current weather in a location",
31 |         parameters: {
32 |           type: "object",
33 |           properties: {
34 |             location: {
35 |               type: "string",
36 |               description: "The city and state, e.g. San Francisco, CA"
37 |             }
38 |           },
39 |           required: ["location"]
40 |         }
41 |       }
42 |     }
43 |   ];
44 | 
45 |   try {
46 |     console.log('Sending request with tools...');
47 |     
48 |     // Use the original model name format that worked in the simple test
49 |     const modelName = 'Qwen/QwQ-32B-AWQ';
50 |     console.log('Using model:', modelName);
51 |     
52 |     const response = await openai.chat.completions.create({
53 |       model: modelName,
54 |       messages: messages,
55 |       tools: tools,
56 |       tool_choice: "auto",
57 |       stream: false
58 |     });
59 |     
60 |     console.log('Success! Response:');
61 |     console.log(JSON.stringify(response, null, 2));
62 |     return response;
63 |   } catch (error) {
64 |     console.error('Error occurred:');
65 |     console.error('Status:', error.status);
66 |     console.error('Headers:', error.headers);
67 |     
68 |     if (error.response) {
69 |       try {
70 |         const responseText = await error.response.text();
71 |         console.error('Response body:', responseText);
72 |         try {
73 |           const responseJson = JSON.parse(responseText);
74 |           console.error('Response JSON:', JSON.stringify(responseJson, null, 2));
75 |         } catch (parseError) {
76 |           // If not valid JSON, the text version is already logged
77 |         }
78 |       } catch (e) {
79 |         console.error('Could not read response data:', e);
80 |       }
81 |     }
82 |     
83 |     throw error;
84 |   }
85 | }
86 | 
87 | // Run the test
88 | testEndpointWithTools()
89 |   .then(() => console.log('Test completed successfully'))
90 |   .catch(err => console.error('Test failed:', err.message));
91 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/updateRAG.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import { parse } from "csv-parse/sync";
 4 | import EmbeddingRetriever from "./EmbeddingRetriever";
 5 | import { logTitle } from "./utils";
 6 | 
 7 | // Function to update the RAG process with CSV data
 8 | async function updateRAGWithCSV() {
 9 |     logTitle('UPDATING RAG WITH CSV DATA');
10 |     
11 |     const csvFilePath = path.join(process.cwd(), '..', 'knowledge', 'q_c_data.csv');
12 |     
13 |     if (!fs.existsSync(csvFilePath)) {
14 |         console.error(`File not found: ${csvFilePath}`);
15 |         return false;
16 |     }
17 |     
18 |     try {
19 |         // Read the CSV file
20 |         const fileContent = fs.readFileSync(csvFilePath, 'utf-8');
21 |         
22 |         // Parse the CSV content
23 |         const records = parse(fileContent, {
24 |             columns: true,
25 |             skip_empty_lines: true
26 |         });
27 |         
28 |         console.log(`Found ${records.length} records in the CSV file`);
29 |         
30 |         // Initialize the embedding retriever
31 |         const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model");
32 |         
33 |         // Process each record in batches to avoid overwhelming the system
34 |         const batchSize = 50;
35 |         let processedCount = 0;
36 |         
37 |         for (let i = 0; i < records.length; i += batchSize) {
38 |             const batch = records.slice(i, i + batchSize);
39 |             
40 |             // Process batch in parallel
41 |             await Promise.all(batch.map(async (record) => {
42 |                 // Combine question and context for better retrieval
43 |                 const documentText = `Question: ${record.question}\nContext: ${record.context}`;
44 |                 
45 |                 // Embed the document
46 |                 await embeddingRetriever.embedDocument(documentText);
47 |             }));
48 |             
49 |             processedCount += batch.length;
50 |             console.log(`Processed ${processedCount}/${records.length} records`);
51 |         }
52 |         
53 |         console.log(`Successfully embedded ${processedCount} records from the CSV file`);
54 |         
55 |         // Close Milvus connection when done
56 |         // @ts-ignore - Access private property for cleanup
57 |         if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') {
58 |             // @ts-ignore
59 |             await embeddingRetriever.vectorStore.close();
60 |         }
61 |         
62 |         return true;
63 |     } catch (error) {
64 |         console.error("Error updating RAG with CSV data:", error);
65 |         return false;
66 |     }
67 | }
68 | 
69 | // Main function
70 | (async () => {
71 |     const success = await updateRAGWithCSV();
72 |     
73 |     if (success) {
74 |         console.log("RAG update completed successfully");
75 |     } else {
76 |         console.error("RAG update failed");
77 |         process.exit(1);
78 |     }
79 | })();
80 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/test-langfuse.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | import { createTrace, createSpan, createGeneration, flushLangfuse, isLangfuseEnabled } from './LangfuseConfig';
 3 | 
 4 | async function testLangfuseIntegration() {
 5 |     console.log('Testing Langfuse Integration...');
 6 |     console.log('Langfuse enabled:', isLangfuseEnabled);
 7 |     
 8 |     if (!isLangfuseEnabled) {
 9 |         console.log('Langfuse is not enabled. Please check your environment variables.');
10 |         return;
11 |     }
12 |     
13 |     // Create a test trace
14 |     const trace = createTrace(
15 |         'test-trace',
16 |         { message: 'Testing Langfuse integration' },
17 |         { testType: 'integration-test' }
18 |     );
19 |     
20 |     if (!trace) {
21 |         console.log('Failed to create trace');
22 |         return;
23 |     }
24 |     
25 |     console.log('Created trace successfully');
26 |     
27 |     // Create a test span
28 |     const span = createSpan(
29 |         trace,
30 |         'test-span',
31 |         { operation: 'test-operation' },
32 |         { spanType: 'test' }
33 |     );
34 |     
35 |     if (span) {
36 |         console.log('Created span successfully');
37 |         
38 |         // Simulate some work
39 |         await new Promise(resolve => setTimeout(resolve, 100));
40 |         
41 |         // End the span
42 |         span.end({
43 |             output: { result: 'test completed successfully' }
44 |         });
45 |         
46 |         console.log('Ended span successfully');
47 |     }
48 |     
49 |     // Create a test generation
50 |     const generation = createGeneration(
51 |         trace,
52 |         'test-generation',
53 |         { prompt: 'Test prompt' },
54 |         'test-model',
55 |         { generationType: 'test' }
56 |     );
57 |     
58 |     if (generation) {
59 |         console.log('Created generation successfully');
60 |         
61 |         // Simulate LLM response
62 |         await new Promise(resolve => setTimeout(resolve, 200));
63 |         
64 |         // End the generation
65 |         generation.end({
66 |             output: { response: 'Test response from LLM' },
67 |             usage: {
68 |                 promptTokens: 10,
69 |                 completionTokens: 5,
70 |                 totalTokens: 15
71 |             }
72 |         });
73 |         
74 |         console.log('Ended generation successfully');
75 |     }
76 |     
77 |     // Update the main trace
78 |     trace.update({
79 |         output: {
80 |             success: true,
81 |             message: 'Test completed successfully'
82 |         }
83 |     });
84 |     
85 |     console.log('Updated trace successfully');
86 |     
87 |     // Flush traces
88 |     console.log('Flushing traces...');
89 |     await flushLangfuse();
90 |     console.log('Traces flushed successfully');
91 |     
92 |     console.log('Langfuse integration test completed!');
93 |     console.log('Check your Langfuse dashboard to see the test traces.');
94 | }
95 | 
96 | // Run the test
97 | testLangfuseIntegration().catch(console.error);
98 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration management for the multi-agent RAG system."""
 2 | 
 3 | import os
 4 | from typing import Optional
 5 | from dotenv import load_dotenv
 6 | 
 7 | # Load environment variables
 8 | load_dotenv()
 9 | 
10 | class Config:
11 |     """Configuration class for the application."""
12 |     
13 |     # LiteLLM Configuration for Reasoning Models
14 |     LITELLM_API_KEY: str = os.getenv("LITELLM_API_KEY", os.getenv("OPENAI_API_KEY", ""))
15 |     LITELLM_BASE_URL: str = os.getenv("LITELLM_BASE_URL", os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
16 |     REASONING_MODEL: str = os.getenv("REASONING_MODEL", "qwen-qwq-32b-preview")
17 |     
18 |     # Embedding Configuration (separate from reasoning)
19 |     EMBEDDING_API_KEY: str = os.getenv("EMBEDDING_API_KEY", os.getenv("OPENAI_API_KEY", ""))
20 |     EMBEDDING_BASE_URL: str = os.getenv("EMBEDDING_BASE_URL", os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
21 |     EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "llamacpp-embedding")
22 |     
23 |     # Legacy OpenAI Configuration (for backward compatibility)
24 |     OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
25 |     OPENAI_BASE_URL: str = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
26 |     DEFAULT_MODEL: str = os.getenv("DEFAULT_MODEL", os.getenv("REASONING_MODEL", "qwen-qwq-32b-preview"))
27 |     
28 |     # AWS Configuration
29 |     AWS_REGION: str = os.getenv("AWS_REGION", "us-east-1")
30 |     OPENSEARCH_ENDPOINT: str = os.getenv("OPENSEARCH_ENDPOINT", "")
31 |     
32 |     # Tavily MCP Configuration
33 |     TAVILY_MCP_SERVICE_URL: str = os.getenv("TAVILY_MCP_SERVICE_URL", "http://localhost:8001/mcp")
34 |     
35 |     # Langfuse Configuration
36 |     LANGFUSE_HOST: str = os.getenv("LANGFUSE_HOST", "")
37 |     LANGFUSE_PUBLIC_KEY: str = os.getenv("LANGFUSE_PUBLIC_KEY", "")
38 |     LANGFUSE_SECRET_KEY: str = os.getenv("LANGFUSE_SECRET_KEY", "")
39 |     
40 |     # Application Configuration
41 |     KNOWLEDGE_DIR: str = os.getenv("KNOWLEDGE_DIR", "knowledge")
42 |     OUTPUT_DIR: str = os.getenv("OUTPUT_DIR", "output")
43 |     EMBEDDING_ENDPOINT: str = os.getenv("EMBEDDING_ENDPOINT", "")
44 |     
45 |     # Vector Search Configuration
46 |     VECTOR_INDEX_NAME: str = os.getenv("VECTOR_INDEX_NAME", "knowledge-embeddings")
47 |     TOP_K_RESULTS: int = int(os.getenv("TOP_K_RESULTS", "5"))
48 |     
49 |     @classmethod
50 |     def is_langfuse_enabled(cls) -> bool:
51 |         """Check if Langfuse is properly configured."""
52 |         return bool(cls.LANGFUSE_HOST and cls.LANGFUSE_PUBLIC_KEY and cls.LANGFUSE_SECRET_KEY)
53 |     
54 |     @classmethod
55 |     def validate_config(cls) -> None:
56 |         """Validate required configuration."""
57 |         required_vars = [
58 |             ("LITELLM_API_KEY", cls.LITELLM_API_KEY),
59 |             ("OPENSEARCH_ENDPOINT", cls.OPENSEARCH_ENDPOINT),
60 |         ]
61 |         
62 |         missing_vars = [name for name, value in required_vars if not value]
63 |         
64 |         if missing_vars:
65 |             raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
66 | 
67 | # Global config instance
68 | config = Config()
69 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/MULTI_AGENT_GUIDE.md:
--------------------------------------------------------------------------------
 1 | # Multi-Agent System Guide
 2 | 
 3 | ## Overview
 4 | 
 5 | This guide explains how to use the multi-agent RAG system that has been implemented to replace the single-agent architecture.
 6 | 
 7 | ## Architecture Changes
 8 | 
 9 | ### Before (Single Agent)
10 | - Single `Agent` class handling all responsibilities
11 | - Direct embedding and retrieval in main flow
12 | - Simple workflow execution
13 | 
14 | ### After (Multi-Agent)
15 | - **SupervisorAgent**: Orchestrates the entire workflow
16 | - **KnowledgeAgent**: Manages knowledge embedding and change detection
17 | - **RAGAgent**: Handles context retrieval and semantic search
18 | - **MCPAgent**: Manages tool interactions and LLM communication
19 | 
20 | ## Usage
21 | 
22 | ### 1. Running the Multi-Agent System
23 | ```bash
24 | pnpm dev
25 | ```
26 | 
27 | ### 2. Embedding Knowledge (Standalone)
28 | ```bash
29 | # Embed all knowledge files (markdown, text, JSON, and CSV) with change detection
30 | pnpm embed-knowledge
31 | ```
32 | 
33 | ### 3. Testing the System
34 | ```bash
35 | pnpm test-agents
36 | ```
37 | 
38 | ## Key Features
39 | 
40 | ### Automatic Change Detection
41 | - The KnowledgeAgent monitors file changes using hashes
42 | - Only processes modified files for efficiency
43 | - Maintains metadata across runs
44 | 
45 | ### Intelligent Workflow
46 | 1. **Knowledge Check**: Automatically detects and embeds new/changed files
47 | 2. **Context Retrieval**: Uses RAG to find relevant information
48 | 3. **Task Execution**: Leverages MCP tools to complete tasks
49 | 
50 | ### Error Handling
51 | - Each agent handles its own errors gracefully
52 | - Supervisor provides comprehensive error reporting
53 | - Resource cleanup is handled properly
54 | 
55 | ## Agent Responsibilities
56 | 
57 | ### SupervisorAgent
58 | - Initializes and coordinates all sub-agents
59 | - Manages the complete workflow execution
60 | - Provides task tracking and result summaries
61 | - Handles cleanup and resource management
62 | 
63 | ### KnowledgeAgent
64 | - Scans knowledge directory for changes
65 | - Embeds new or modified documents
66 | - Supports multiple file formats (MD, TXT, JSON, CSV) in a unified process
67 | - Maintains change detection metadata
68 | 
69 | ### RAGAgent
70 | - Performs semantic search using embeddings
71 | - Retrieves relevant context for queries
72 | - Supports advanced features like reranking
73 | - Optimizes context for LLM consumption
74 | 
75 | ### MCPAgent
76 | - Manages LLM interactions with tool support
77 | - Handles multi-turn conversations
78 | - Processes tool calls through MCP protocol
79 | - Maintains conversation context
80 | 
81 | ## Configuration
82 | 
83 | The system uses the same environment variables as before:
84 | - `OPENAI_API_KEY`: Your OpenAI API key
85 | - `OPENAI_BASE_URL`: Your model hosting endpoint
86 | - `OPENSEARCH_ENDPOINT`: Your OpenSearch endpoint
87 | - `AWS_REGION`: Your AWS region
88 | 
89 | ## Benefits
90 | 
91 | 1. **Modularity**: Each agent has a specific purpose
92 | 2. **Maintainability**: Easier to modify individual components
93 | 3. **Scalability**: Agents can be scaled independently
94 | 4. **Reliability**: Better error isolation and handling
95 | 5. **Extensibility**: Easy to add new agent types
96 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/embedCSV.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import { parse } from "csv-parse/sync";
 4 | import EmbeddingRetriever from "./EmbeddingRetriever";
 5 | import { logTitle } from "./utils";
 6 | 
 7 | // Function to process and embed CSV data
 8 | async function embedCSVData() {
 9 |     logTitle('EMBEDDING CSV DATA');
10 |     
11 |     const csvPath = path.join(process.cwd(), 'knowledge');
12 |     
13 |     if (!fs.existsSync(csvPath)) {
14 |         console.error(`CSV data directory not found: ${csvPath}`);
15 |         return false;
16 |     }
17 |     
18 |     try {
19 |         // Get all CSV files in the data directory
20 |         const files = fs.readdirSync(csvPath)
21 |             .filter(file => file.endsWith('.csv'));
22 |         
23 |         console.log(`Found ${files.length} CSV files in the data directory`);
24 |         
25 |         // Initialize the embedding retriever
26 |         const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model");
27 |         
28 |         // Process each file
29 |         for (const file of files) {
30 |             const filePath = path.join(csvPath, file);
31 |             console.log(`Processing file: ${file}`);
32 |             
33 |             // Read the file content
34 |             const content = fs.readFileSync(filePath, 'utf-8');
35 |             
36 |             // Parse CSV
37 |             const records = parse(content, {
38 |                 columns: true,
39 |                 skip_empty_lines: true
40 |             });
41 |             
42 |             console.log(`Found ${records.length} records in ${file}`);
43 |             
44 |             // Process each record
45 |             for (const record of records) {
46 |                 // Convert record to a string representation
47 |                 const recordString = Object.entries(record)
48 |                     .map(([key, value]) => `${key}: ${value}`)
49 |                     .join('\n');
50 |                 
51 |                 // Create a document with metadata
52 |                 const document = `# ${record.name || record.title || record.id || 'Record'}\n\n${recordString}`;
53 |                 
54 |                 // Embed the document
55 |                 await embeddingRetriever.embedDocument(document);
56 |             }
57 |             
58 |             console.log(`Successfully embedded ${records.length} records from ${file}`);
59 |         }
60 |         
61 |         // Close Milvus connection when done
62 |         // @ts-ignore - Access private property for cleanup
63 |         if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') {
64 |             // @ts-ignore
65 |             await embeddingRetriever.vectorStore.close();
66 |         }
67 |         
68 |         return true;
69 |     } catch (error) {
70 |         console.error("Error embedding CSV data:", error);
71 |         return false;
72 |     }
73 | }
74 | 
75 | // Main function
76 | (async () => {
77 |     const success = await embedCSVData();
78 |     
79 |     if (success) {
80 |         console.log("CSV data embedding completed successfully");
81 |     } else {
82 |         console.error("CSV data embedding failed");
83 |         process.exit(1);
84 |     }
85 | })();
86 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/index.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config';
 2 | import MCPClient from "./MCPClient";
 3 | import { SupervisorAgent } from "./agents";
 4 | import path from "path";
 5 | import fs from "fs";
 6 | import { logTitle } from "./utils";
 7 | 
 8 | // Verify environment variables are loaded
 9 | if (!process.env.AWS_REGION || !process.env.OPENSEARCH_ENDPOINT) {
10 |   throw new Error('Required environment variables AWS_REGION and OPENSEARCH_ENDPOINT are not set');
11 | }
12 | 
13 | // Use the parent directory (where the command is run) instead of src directory
14 | const outPath = path.resolve(process.cwd(), 'output');
15 | const TASK = `
16 | Find information about "What is the most important aspect of initial treatment for Bell's palsy?". 
17 | Summarize this information and create a comprehensive story about Bell's palsy treatment.
18 | Save the story and important information to a file named "bells_palsy_treatment.md" in the output directory as a beautiful markdown file.
19 | Include sections for:
20 | 1. Overview of Bell's palsy
21 | 2. Most important initial treatment aspects
22 | 3. Timeline for treatment
23 | 4. Expected outcomes
24 | 5. Additional recommendations
25 | `
26 | 
27 | // Make sure output directory exists
28 | if (!fs.existsSync(outPath)) {
29 |   fs.mkdirSync(outPath, { recursive: true });
30 | }
31 | 
32 | // Start the multi-agent application
33 | (async () => {
34 |   try {
35 |     logTitle('INITIALIZING MULTI-AGENT RAG SYSTEM');
36 |     
37 |     // Initialize the filesystem MCP client
38 |     const fileMCP = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]);
39 |     
40 |     await main(fileMCP);
41 |   } catch (error) {
42 |     console.error("Error in main:", error);
43 |     process.exit(1);
44 |   }
45 | })();
46 | 
47 | async function main(fileMCP: MCPClient) {
48 |   let supervisor: SupervisorAgent | null = null;
49 |   
50 |   try {
51 |     // Initialize the supervisor agent with MCP clients
52 |     logTitle('INITIALIZING SUPERVISOR AGENT');
53 |     supervisor = new SupervisorAgent([fileMCP], 'Qwen/QwQ-32B-AWQ');
54 |     await supervisor.init();
55 |     
56 |     // Execute the complete workflow
57 |     logTitle('EXECUTING MULTI-AGENT WORKFLOW');
58 |     console.log('Task:', TASK);
59 |     
60 |     const result = await supervisor.executeWorkflow(TASK);
61 |     
62 |     // Display results
63 |     logTitle('WORKFLOW COMPLETED');
64 |     console.log('Final Result:', result);
65 |     
66 |     // Display workflow summary
67 |     const summary = supervisor.getWorkflowSummary();
68 |     console.log('\n' + summary);
69 |     
70 |     // Display detailed task results
71 |     const taskResults = supervisor.getTaskResults();
72 |     console.log('\nDetailed Task Results:');
73 |     for (const [taskId, result] of taskResults) {
74 |       console.log(`- ${taskId}: ${result.success ? 'SUCCESS' : 'FAILED'}`);
75 |       if (result.error) {
76 |         console.log(`  Error: ${result.error}`);
77 |       }
78 |     }
79 |     
80 |   } catch (error) {
81 |     console.error("Error in workflow execution:", error);
82 |     throw error;
83 |   } finally {
84 |     // Clean up resources
85 |     if (supervisor) {
86 |       logTitle('CLEANING UP');
87 |       await supervisor.close();
88 |     }
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/k8s/tavily-mcp-deployment.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: tavily-mcp-server
  5 |   labels:
  6 |     app: tavily-mcp-server
  7 |     component: mcp-server
  8 | spec:
  9 |   replicas: 1
 10 |   selector:
 11 |     matchLabels:
 12 |       app: tavily-mcp-server
 13 |   template:
 14 |     metadata:
 15 |       labels:
 16 |         app: tavily-mcp-server
 17 |         component: mcp-server
 18 |     spec:
 19 |       containers:
 20 |       - name: tavily-mcp-server
 21 |         image: 
 22 |         # Fixed command to run MCP server directly with proper host binding
 23 |         command:
 24 |         - python
 25 |         - -c
 26 |         - |
 27 |           from dotenv import load_dotenv
 28 |           import os
 29 |           # Load environment variables from ConfigMap or local file
 30 |           if os.path.exists('/app/config/.env'):
 31 |               load_dotenv('/app/config/.env')
 32 |               print('Loaded environment from ConfigMap')
 33 |           elif os.path.exists('/app/.env'):
 34 |               load_dotenv('/app/.env')
 35 |               print('Loaded environment from local file')
 36 |           # Import and run MCP server with host binding to 0.0.0.0
 37 |           from src.mcp_servers.tavily_search_server import mcp
 38 |           mcp.run(transport='streamable-http', port=8001, host='0.0.0.0')
 39 |         ports:
 40 |         - containerPort: 8001
 41 |           name: http
 42 |         env:
 43 |         # Tavily Configuration
 44 |         - name: TAVILY_API_KEY
 45 |           valueFrom:
 46 |             secretKeyRef:
 47 |               name: app-secrets
 48 |               key: tavily-api-key
 49 |         # LiteLLM Configuration (for potential future use)
 50 |         - name: LITELLM_API_KEY
 51 |           valueFrom:
 52 |             secretKeyRef:
 53 |               name: app-secrets
 54 |               key: litellm-api-key
 55 |         - name: LITELLM_BASE_URL
 56 |           valueFrom:
 57 |             configMapKeyRef:
 58 |               name: app-config
 59 |               key: litellm-base-url
 60 |         # AWS Configuration
 61 |         - name: AWS_REGION
 62 |           valueFrom:
 63 |             configMapKeyRef:
 64 |               name: app-config
 65 |               key: aws-region
 66 |         # Server Configuration
 67 |         - name: PORT
 68 |           value: "8001"
 69 |         - name: HOST
 70 |           value: "0.0.0.0"
 71 |         resources:
 72 |           requests:
 73 |             memory: "128Mi"
 74 |             cpu: "50m"
 75 |           limits:
 76 |             memory: "256Mi"
 77 |             cpu: "200m"
 78 |         # Health checks removed as not needed for MCP server
 79 |         volumeMounts:
 80 |         - name: app-config
 81 |           mountPath: /app/config
 82 |           readOnly: true
 83 |       volumes:
 84 |       - name: app-config
 85 |         configMap:
 86 |           name: app-config
 87 |       restartPolicy: Always
 88 | ---
 89 | apiVersion: v1
 90 | kind: Service
 91 | metadata:
 92 |   name: tavily-mcp-service
 93 |   labels:
 94 |     app: tavily-mcp-server
 95 |     component: mcp-server
 96 | spec:
 97 |   selector:
 98 |     app: tavily-mcp-server
 99 |   ports:
100 |   - name: http
101 |     port: 8001
102 |     targetPort: 8001
103 |     protocol: TCP
104 |   type: ClusterIP
105 | 


--------------------------------------------------------------------------------
/milvus/README.md:
--------------------------------------------------------------------------------
 1 | # Milvus Vector Database on AWS EKS with Graviton
 2 | 
 3 | This directory contains configuration files for deploying Milvus, a vector database, on Amazon EKS with AWS Graviton processors. This setup is part of the larger project for cost-effective and scalable Small Language Models inference on AWS Graviton4 with EKS.
 4 | 
 5 | ## Overview
 6 | 
 7 | Milvus is an open-source vector database built to power embedding similarity search and AI applications. In this setup, we deploy Milvus in standalone mode on AWS Graviton-based EKS nodes to leverage the cost-effectiveness and performance of ARM64 architecture.
 8 | 
 9 | ## Prerequisites
10 | 
11 | - An existing EKS cluster with Graviton (ARM64) nodes
12 | - Cert-manager installed on the cluster
13 | - Milvus Operator installed on the cluster
14 | - AWS EBS CSI driver configured for persistent storage
15 | 
16 | ## Configuration Files
17 | 
18 | This directory includes the following configuration files:
19 | 
20 | 1. **ebs-storage-class.yaml**: Defines an AWS EBS storage class for Milvus persistent storage
21 |    - Uses gp3 volume type
22 |    - Enables encryption
23 |    - Configures WaitForFirstConsumer binding mode
24 | 
25 | 2. **milvus-standalone.yaml**: Deploys Milvus in standalone mode
26 |    - Configures Milvus to run on ARM64 (Graviton) nodes
27 |    - Sets up resource requests
28 |    - Configures in-cluster dependencies (etcd, pulsar, storage)
29 |    - All components are configured to run on ARM64 architecture
30 | 
31 | 3. **milvus-nlb-service.yaml**: Creates a Network Load Balancer service for external access
32 |    - Exposes Milvus service port (19530)
33 |    - Exposes metrics port (9091)
34 |    - Configures internet-facing NLB
35 | 
36 | ## Deployment Steps
37 | 
38 | 1. **Install cert-manager** (if not already installed):
39 |    ```bash
40 |    kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.yaml
41 |    kubectl get pods -n cert-manager
42 |    ```
43 | 
44 | 2. **Install Milvus Operator** (if not already installed):
45 |    ```bash
46 |    kubectl apply -f https://raw.githubusercontent.com/zilliztech/milvus-operator/main/deploy/manifests/deployment.yaml
47 |    kubectl get pods -n milvus-operator
48 |    ```
49 | 
50 | 3. **Create EBS Storage Class**:
51 |    ```bash
52 |    kubectl apply -f ebs-storage-class.yaml
53 |    ```
54 | 
55 | 4. **Deploy Milvus in standalone mode**:
56 |    ```bash
57 |    kubectl apply -f milvus-standalone.yaml
58 |    ```
59 | 
60 | 5. **Create NLB Service** (optional, if you need external access):
61 |    ```bash
62 |    kubectl apply -f milvus-nlb-service.yaml
63 |    ```
64 | 
65 | ## Accessing Milvus
66 | 
67 | You can access Milvus using port-forwarding:
68 | ```bash
69 | kubectl port-forward service/my-release-milvus 19530:19530
70 | ```
71 | 
72 | Or through the Network Load Balancer if you deployed the NLB service.
73 | 
74 | ## Integration with LLM Services
75 | 
76 | This Milvus deployment can be integrated with the LLM services in the parent project for vector search capabilities, enabling:
77 | - Semantic search
78 | - Retrieval-augmented generation (RAG)
79 | - Document similarity matching
80 | - And other vector-based operations
81 | 
82 | ## Uninstalling
83 | 
84 | To uninstall Milvus:
85 | ```bash
86 | kubectl delete milvus my-release
87 | ```
88 | 
89 | ## Additional Resources
90 | 
91 | - [Milvus Documentation](https://milvus.io/docs)
92 | - [Milvus Operator GitHub](https://github.com/zilliztech/milvus-operator)
93 | - [Main Project README](../README.md)
94 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/AmazonQ.md:
--------------------------------------------------------------------------------
 1 | # Augmented LLM with MCP and RAG
 2 | 
 3 | This project demonstrates a framework-independent implementation of an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness.
 4 | 
 5 | ## Project Overview
 6 | 
 7 | This application creates an AI agent that can:
 8 | 1. Retrieve relevant information from a knowledge base using vector embeddings
 9 | 2. Interact with external tools through the Model Context Protocol (MCP)
10 | 3. Generate responses based on both the retrieved context and tool interactions
11 | 4. Complete tasks like summarizing content and saving results to files
12 | 
13 | ## Architecture
14 | 
15 | The system is built with a modular architecture consisting of these key components:
16 | 
17 | ```
18 | Agent → Manages the overall workflow and coordinates components
19 |   ├── ChatOpenAI → Handles LLM interactions and tool calling
20 |   ├── MCPClient(s) → Connects to MCP servers for tool access
21 |   └── EmbeddingRetriever → Performs vector search for relevant context
22 |       └── VectorStore → Stores and searches document embeddings
23 | ```
24 | 
25 | ## Workflow Explanation
26 | 
27 | 1. **Initialization**:
28 |    - The system loads knowledge documents and creates embeddings using AWS Bedrock
29 |    - Embeddings are stored in an in-memory vector store
30 |    - MCP clients are initialized to connect to tool servers
31 | 
32 | 2. **RAG Process**:
33 |    - When a query is received, it's converted to an embedding
34 |    - The system searches for the most relevant documents using cosine similarity
35 |    - Retrieved documents are combined to form context for the LLM
36 | 
37 | 3. **Agent Execution**:
38 |    - The agent initializes with the LLM, MCP clients, and retrieved context
39 |    - The user query is sent to the LLM along with the context
40 |    - The LLM generates responses and may request tool calls
41 | 
42 | 4. **Tool Usage**:
43 |    - When the LLM requests a tool, the agent routes the call to the appropriate MCP client
44 |    - The MCP client executes the tool and returns results
45 |    - Results are fed back to the LLM to continue the conversation
46 | 
47 | 5. **Output Generation**:
48 |    - The LLM generates a final response incorporating tool results and context
49 |    - In the example task, it creates a markdown file with information about "Antonette"
50 | 
51 | ## Key Technologies
52 | 
53 | - **LLM Integration**: Uses OpenAI API for language model capabilities
54 | - **MCP Implementation**: Connects to MCP servers for filesystem operations
55 | - **Vector Embeddings**: Uses AWS Bedrock for generating embeddings
56 | - **Vector Search**: Implements cosine similarity for finding relevant documents
57 | 
58 | ## Implementation Details
59 | 
60 | - **Framework Independence**: Built without relying on frameworks like LangChain or LlamaIndex
61 | - **Modular Design**: Components are separated for easy maintenance and extension
62 | - **AWS Integration**: Uses AWS Bedrock for embedding generation
63 | - **Tool Orchestration**: Manages tool calls and responses through MCP protocol
64 | 
65 | ## Example Use Case
66 | 
67 | The current implementation demonstrates a task where the agent:
68 | 1. Retrieves information about a user named "Antonette" from the knowledge base
69 | 2. Summarizes the information and creates a story about her
70 | 3. Saves the output to a markdown file using the filesystem MCP tool
71 | 
72 | This architecture can be extended to support various tasks requiring context-aware responses and tool usage.
73 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/AmazonQ.md:
--------------------------------------------------------------------------------
 1 | # Augmented LLM with MCP and RAG
 2 | 
 3 | This project demonstrates a framework-independent implementation of an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness.
 4 | 
 5 | ## Project Overview
 6 | 
 7 | This application creates an AI agent that can:
 8 | 1. Retrieve relevant information from a knowledge base using vector embeddings
 9 | 2. Interact with external tools through the Model Context Protocol (MCP)
10 | 3. Generate responses based on both the retrieved context and tool interactions
11 | 4. Complete tasks like summarizing content and saving results to files
12 | 
13 | ## Architecture
14 | 
15 | The system is built with a modular architecture consisting of these key components:
16 | 
17 | ```
18 | Agent → Manages the overall workflow and coordinates components
19 |   ├── ChatOpenAI → Handles LLM interactions and tool calling
20 |   ├── MCPClient(s) → Connects to MCP servers for tool access
21 |   └── EmbeddingRetriever → Performs vector search for relevant context
22 |       └── VectorStore → Stores and searches document embeddings
23 | ```
24 | 
25 | ## Workflow Explanation
26 | 
27 | 1. **Initialization**:
28 |    - The system loads knowledge documents and creates embeddings using AWS Bedrock
29 |    - Embeddings are stored in an in-memory vector store
30 |    - MCP clients are initialized to connect to tool servers
31 | 
32 | 2. **RAG Process**:
33 |    - When a query is received, it's converted to an embedding
34 |    - The system searches for the most relevant documents using cosine similarity
35 |    - Retrieved documents are combined to form context for the LLM
36 | 
37 | 3. **Agent Execution**:
38 |    - The agent initializes with the LLM, MCP clients, and retrieved context
39 |    - The user query is sent to the LLM along with the context
40 |    - The LLM generates responses and may request tool calls
41 | 
42 | 4. **Tool Usage**:
43 |    - When the LLM requests a tool, the agent routes the call to the appropriate MCP client
44 |    - The MCP client executes the tool and returns results
45 |    - Results are fed back to the LLM to continue the conversation
46 | 
47 | 5. **Output Generation**:
48 |    - The LLM generates a final response incorporating tool results and context
49 |    - In the example task, it creates a markdown file with information about "Antonette"
50 | 
51 | ## Key Technologies
52 | 
53 | - **LLM Integration**: Uses OpenAI API for language model capabilities
54 | - **MCP Implementation**: Connects to MCP servers for filesystem operations
55 | - **Vector Embeddings**: Uses AWS Bedrock for generating embeddings
56 | - **Vector Search**: Implements cosine similarity for finding relevant documents
57 | 
58 | ## Implementation Details
59 | 
60 | - **Framework Independence**: Built without relying on frameworks like LangChain or LlamaIndex
61 | - **Modular Design**: Components are separated for easy maintenance and extension
62 | - **AWS Integration**: Uses AWS Bedrock for embedding generation
63 | - **Tool Orchestration**: Manages tool calls and responses through MCP protocol
64 | 
65 | ## Example Use Case
66 | 
67 | The current implementation demonstrates a task where the agent:
68 | 1. Retrieves information about a user named "Antonette" from the knowledge base
69 | 2. Summarizes the information and creates a story about her
70 | 3. Saves the output to a markdown file using the filesystem MCP tool
71 | 
72 | This architecture can be extended to support various tasks requiring context-aware responses and tool usage.
73 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/opensearch_client.py:
--------------------------------------------------------------------------------
 1 | """OpenSearch client wrapper for the multi-agent RAG system."""
 2 | 
 3 | import logging
 4 | from typing import Optional, Dict, Any
 5 | from ..tools.opensearch_vector_store import OpenSearchVectorStore
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class OpenSearchClient:
10 |     """OpenSearch client wrapper that provides compatibility with server.py expectations."""
11 |     
12 |     def __init__(self, config):
13 |         """Initialize OpenSearch client with configuration."""
14 |         self.config = config
15 |         self._vector_store = None
16 |         self.client = None
17 |         self._initialize_client()
18 |     
19 |     def _initialize_client(self) -> None:
20 |         """Initialize OpenSearch client using the existing vector store."""
21 |         try:
22 |             # Use the existing OpenSearchVectorStore implementation
23 |             self._vector_store = OpenSearchVectorStore()
24 |             
25 |             # Expose the underlying client for compatibility
26 |             if self._vector_store and self._vector_store.client:
27 |                 self.client = self._vector_store.client
28 |                 logger.info("OpenSearch client initialized successfully via vector store")
29 |             else:
30 |                 logger.warning("OpenSearch vector store client not available")
31 |                 self.client = None
32 |                 
33 |         except Exception as e:
34 |             logger.error(f"Failed to initialize OpenSearch client: {e}")
35 |             self.client = None
36 |     
37 |     def is_connected(self) -> bool:
38 |         """Check if the client is connected and can reach OpenSearch."""
39 |         if not self.client:
40 |             return False
41 |         
42 |         try:
43 |             info = self.client.info()
44 |             return True
45 |         except Exception as e:
46 |             logger.debug(f"OpenSearch connection check failed: {e}")
47 |             return False
48 |     
49 |     def get_info(self) -> Optional[Dict[str, Any]]:
50 |         """Get OpenSearch cluster information."""
51 |         if not self.client:
52 |             return None
53 |         
54 |         try:
55 |             return self.client.info()
56 |         except Exception as e:
57 |             logger.error(f"Failed to get OpenSearch info: {e}")
58 |             return None
59 |     
60 |     def index_exists(self, index_name: str) -> bool:
61 |         """Check if an index exists."""
62 |         if not self.client:
63 |             return False
64 |         
65 |         try:
66 |             return self.client.indices.exists(index=index_name)
67 |         except Exception as e:
68 |             logger.error(f"Failed to check if index exists: {e}")
69 |             return False
70 |     
71 |     def get_document_count(self, index_name: str) -> int:
72 |         """Get the number of documents in an index."""
73 |         if not self.client:
74 |             return 0
75 |         
76 |         try:
77 |             response = self.client.count(index=index_name)
78 |             return response.get("count", 0)
79 |         except Exception as e:
80 |             logger.error(f"Failed to get document count: {e}")
81 |             return 0
82 |     
83 |     def close(self) -> None:
84 |         """Close the OpenSearch connection."""
85 |         if self._vector_store:
86 |             try:
87 |                 self._vector_store.close()
88 |                 logger.info("OpenSearch connection closed")
89 |             except Exception as e:
90 |                 logger.error(f"Error closing OpenSearch connection: {e}")
91 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/scripts/embed_knowledge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Knowledge Embedding Script
 4 | 
 5 | This script processes all documents in the knowledge directory and creates
 6 | embeddings for them in the OpenSearch vector store.
 7 | """
 8 | 
 9 | import sys
10 | import logging
11 | from pathlib import Path
12 | from ..config import config
13 | from ..utils.logging import setup_logging, log_title
14 | from ..agents.knowledge_agent import knowledge_agent
15 | 
16 | def main():
17 |     """Main function for embedding knowledge."""
18 |     # Setup logging
19 |     setup_logging()
20 |     logger = logging.getLogger(__name__)
21 |     
22 |     try:
23 |         # Validate configuration
24 |         config.validate_config()
25 |         
26 |         log_title("KNOWLEDGE EMBEDDING SCRIPT")
27 |         logger.info("Starting knowledge embedding process")
28 |         logger.info(f"Knowledge Directory: {config.KNOWLEDGE_DIR}")
29 |         logger.info(f"OpenSearch Endpoint: {config.OPENSEARCH_ENDPOINT}")
30 |         logger.info(f"Vector Index: {config.VECTOR_INDEX_NAME}")
31 |         
32 |         # Check if knowledge directory exists
33 |         knowledge_path = Path(config.KNOWLEDGE_DIR)
34 |         if not knowledge_path.exists():
35 |             logger.error(f"Knowledge directory does not exist: {config.KNOWLEDGE_DIR}")
36 |             sys.exit(1)
37 |         
38 |         # Get knowledge statistics before embedding
39 |         print("\n📊 Getting knowledge statistics...")
40 |         stats_result = knowledge_agent("get_stats")
41 |         if stats_result.get("success"):
42 |             stats = stats_result.get("stats", {})
43 |             print(f"Total files: {stats.get('total_files', 0)}")
44 |             print(f"File types: {stats.get('file_types', {})}")
45 |             print(f"Current vector store count: {stats.get('vector_store_count', 0)}")
46 |         
47 |         # Check for changes
48 |         print("\n🔍 Checking for knowledge changes...")
49 |         check_result = knowledge_agent("check_changes")
50 |         
51 |         if not check_result.get("success"):
52 |             logger.error(f"Failed to check for changes: {check_result.get('message')}")
53 |             sys.exit(1)
54 |         
55 |         has_changes = check_result.get("has_changes", False)
56 |         print(f"Changes detected: {has_changes}")
57 |         
58 |         # Embed knowledge (force embedding regardless of changes)
59 |         print("\n🚀 Starting knowledge embedding...")
60 |         embed_result = knowledge_agent("embed_knowledge")
61 |         
62 |         if embed_result.get("success"):
63 |             print("✅ Knowledge embedding completed successfully!")
64 |             
65 |             # Get updated statistics
66 |             print("\n📊 Updated knowledge statistics...")
67 |             updated_stats_result = knowledge_agent("get_stats")
68 |             if updated_stats_result.get("success"):
69 |                 updated_stats = updated_stats_result.get("stats", {})
70 |                 print(f"Vector store count after embedding: {updated_stats.get('vector_store_count', 0)}")
71 |         else:
72 |             print(f"❌ Knowledge embedding failed: {embed_result.get('message')}")
73 |             sys.exit(1)
74 |         
75 |         print("\n🎉 Knowledge embedding process completed!")
76 |         
77 |     except KeyboardInterrupt:
78 |         print("\n\nProcess interrupted by user.")
79 |         sys.exit(0)
80 |     except Exception as e:
81 |         logger.error(f"Knowledge embedding failed: {e}")
82 |         print(f"❌ Error: {e}")
83 |         sys.exit(1)
84 | 
85 | if __name__ == "__main__":
86 |     main()
87 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/async_cleanup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for handling async cleanup and suppressing warnings.
 3 | """
 4 | 
 5 | import warnings
 6 | import sys
 7 | import logging
 8 | from contextlib import contextmanager
 9 | 
10 | # Configure logging to suppress specific async warnings
11 | logging.getLogger("httpcore").setLevel(logging.ERROR)
12 | logging.getLogger("httpx").setLevel(logging.ERROR)
13 | logging.getLogger("anyio").setLevel(logging.ERROR)
14 | 
15 | @contextmanager
16 | def suppress_async_warnings():
17 |     """Context manager to suppress async-related warnings during RAGAs evaluation."""
18 |     
19 |     # Store original warning filters
20 |     original_filters = warnings.filters[:]
21 |     
22 |     # Store original stderr
23 |     original_stderr = sys.stderr
24 |     
25 |     try:
26 |         # Suppress specific async warnings
27 |         warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*async generator ignored GeneratorExit.*")
28 |         warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*coroutine.*was never awaited.*")
29 |         warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*Attempted to exit cancel scope.*")
30 |         warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*no running event loop.*")
31 |         
32 |         # Suppress HTTP connection warnings
33 |         warnings.filterwarnings("ignore", message=".*HTTP11ConnectionByteStream.*")
34 |         warnings.filterwarnings("ignore", message=".*HTTP11Connection.*")
35 |         
36 |         # Create a custom stderr that filters out specific error messages
37 |         class FilteredStderr:
38 |             def __init__(self, original_stderr):
39 |                 self.original_stderr = original_stderr
40 |                 
41 |             def write(self, text):
42 |                 # Filter out specific async error messages
43 |                 if any(phrase in text for phrase in [
44 |                     "async generator ignored GeneratorExit",
45 |                     "Attempted to exit cancel scope",
46 |                     "no running event loop",
47 |                     "HTTP11ConnectionByteStream",
48 |                     "coroutine object HTTP11ConnectionByteStream.aclose"
49 |                 ]):
50 |                     return  # Don't write these messages
51 |                 
52 |                 self.original_stderr.write(text)
53 |                 
54 |             def flush(self):
55 |                 self.original_stderr.flush()
56 |                 
57 |             def __getattr__(self, name):
58 |                 return getattr(self.original_stderr, name)
59 |         
60 |         # Replace stderr temporarily
61 |         sys.stderr = FilteredStderr(original_stderr)
62 |         
63 |         yield
64 |         
65 |     finally:
66 |         # Restore original settings
67 |         warnings.filters[:] = original_filters
68 |         sys.stderr = original_stderr
69 | 
70 | def setup_async_environment():
71 |     """Set up the environment to minimize async warnings."""
72 |     
73 |     # Configure logging levels
74 |     loggers_to_quiet = [
75 |         "httpcore",
76 |         "httpx", 
77 |         "anyio",
78 |         "asyncio",
79 |         "urllib3.connectionpool"
80 |     ]
81 |     
82 |     for logger_name in loggers_to_quiet:
83 |         logger = logging.getLogger(logger_name)
84 |         logger.setLevel(logging.ERROR)
85 |         
86 |     # Set global warning filters
87 |     warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*async generator ignored GeneratorExit.*")
88 |     warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*coroutine.*was never awaited.*")
89 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/rebuildCollection.ts:
--------------------------------------------------------------------------------
 1 | import { MilvusClient, DataType } from '@zilliz/milvus2-sdk-node';
 2 | import { logTitle } from "./utils";
 3 | import 'dotenv/config';
 4 | 
 5 | // Function to rebuild the Milvus collection with 384 dimensions
 6 | async function rebuildCollection() {
 7 |     logTitle('REBUILDING MILVUS COLLECTION');
 8 |     
 9 |     const collectionName = 'rag_documents_384d';
10 |     const dimension = 384; // New dimension to match the custom embedding endpoint
11 |     
12 |     try {
13 |         // Connect to Milvus
14 |         const client = new MilvusClient({
15 |             address: process.env.MILVUS_ADDRESS || '',
16 |             username: process.env.MILVUS_USERNAME || '',
17 |             password: process.env.MILVUS_PASSWORD || '',
18 |         });
19 |         
20 |         // Check if collection exists and drop it if it does
21 |         const hasCollection = await client.hasCollection({
22 |             collection_name: collectionName,
23 |         });
24 |         
25 |         if (hasCollection.value) {
26 |             console.log(`Collection ${collectionName} already exists. Dropping it...`);
27 |             await client.dropCollection({
28 |                 collection_name: collectionName,
29 |             });
30 |             console.log(`Collection ${collectionName} dropped successfully.`);
31 |         }
32 |         
33 |         // Create new collection with 384 dimensions
34 |         console.log(`Creating new collection ${collectionName} with ${dimension} dimensions...`);
35 |         await client.createCollection({
36 |             collection_name: collectionName,
37 |             fields: [
38 |                 {
39 |                     name: 'id',
40 |                     data_type: DataType.Int64,
41 |                     is_primary_key: true,
42 |                     autoID: true,
43 |                 },
44 |                 {
45 |                     name: 'embedding',
46 |                     data_type: DataType.FloatVector,
47 |                     dim: dimension,
48 |                 },
49 |                 {
50 |                     name: 'document',
51 |                     data_type: DataType.VarChar,
52 |                     max_length: 65535,
53 |                 },
54 |             ],
55 |         });
56 |         
57 |         // Create index for vector search
58 |         console.log(`Creating index for collection ${collectionName}...`);
59 |         await client.createIndex({
60 |             collection_name: collectionName,
61 |             field_name: 'embedding',
62 |             index_type: 'HNSW',
63 |             metric_type: 'COSINE',
64 |             params: { M: 8, efConstruction: 64 },
65 |         });
66 |         
67 |         // Load collection into memory
68 |         console.log(`Loading collection ${collectionName} into memory...`);
69 |         await client.loadCollection({
70 |             collection_name: collectionName,
71 |         });
72 |         
73 |         console.log(`Collection ${collectionName} created and indexed successfully.`);
74 |         
75 |         // Close connection
76 |         await client.closeConnection();
77 |         
78 |         return true;
79 |     } catch (error) {
80 |         console.error("Error rebuilding Milvus collection:", error);
81 |         return false;
82 |     }
83 | }
84 | 
85 | // Main function
86 | (async () => {
87 |     const success = await rebuildCollection();
88 |     
89 |     if (success) {
90 |         console.log("Collection rebuild completed successfully. Now you need to re-embed your documents.");
91 |         console.log("Run 'pnpm embed-knowledge' and 'pnpm embed-csv' to populate the new collection.");
92 |     } else {
93 |         console.error("Collection rebuild failed");
94 |         process.exit(1);
95 |     }
96 | })();
97 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/run_main_clean.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Clean main application runner with complete async error suppression.
  4 | """
  5 | 
  6 | import sys
  7 | import os
  8 | import warnings
  9 | 
 10 | # Add current directory to path
 11 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 12 | 
 13 | class CompleteAsyncErrorFilter:
 14 |     """Complete async error filter that suppresses all async-related output."""
 15 |     
 16 |     def __init__(self):
 17 |         self.original_stderr = sys.__stderr__
 18 |         
 19 |     def write(self, text):
 20 |         """Filter out all async RuntimeErrors and related output."""
 21 |         if not text.strip():
 22 |             return
 23 |             
 24 |         # Comprehensive list of patterns to suppress
 25 |         suppress_patterns = [
 26 |             "RuntimeError",
 27 |             "httpcore",
 28 |             "_synchronization",
 29 |             "asyncio",
 30 |             "anyio",
 31 |             "sniffio",
 32 |             "await",
 33 |             "async",
 34 |             "CancelScope",
 35 |             "shield",
 36 |             "current_task",
 37 |             "get_running_loop",
 38 |             "cancel_shielded_checkpoint",
 39 |             "_anyio_lock",
 40 |             "acquire",
 41 |             "File \"/home/ubuntu/Cost_Effective_and_Scalable_Models_Inference_on_AWS_Graviton/agentic-apps/strandsdk_agentic_rag_opensearch/venv/lib/python3.10/site-packages/httpcore",
 42 |             "File \"/usr/lib/python3.10/asyncio",
 43 |             "raise RuntimeError",
 44 |         ]
 45 |         
 46 |         # Check if this line should be suppressed
 47 |         should_suppress = any(pattern in text for pattern in suppress_patterns)
 48 |         
 49 |         # Also suppress lines that are just punctuation or whitespace
 50 |         if text.strip() in [":", "RuntimeError:", "RuntimeError: ", "RuntimeError", ""]:
 51 |             should_suppress = True
 52 |         
 53 |         # Only write if not suppressed and contains meaningful content
 54 |         if not should_suppress and len(text.strip()) > 1:
 55 |             self.original_stderr.write(text)
 56 |             self.original_stderr.flush()
 57 |         
 58 |     def flush(self):
 59 |         """Flush the original stderr."""
 60 |         self.original_stderr.flush()
 61 | 
 62 | def setup_complete_clean_environment():
 63 |     """Set up completely clean environment."""
 64 |     
 65 |     # Suppress all warnings
 66 |     warnings.filterwarnings("ignore")
 67 |     
 68 |     # Install complete error filter
 69 |     sys.stderr = CompleteAsyncErrorFilter()
 70 |     
 71 |     # Try to import and use existing cleanup if available
 72 |     try:
 73 |         from src.utils.global_async_cleanup import setup_global_async_cleanup
 74 |         setup_global_async_cleanup()
 75 |     except ImportError:
 76 |         pass
 77 | 
 78 | if __name__ == "__main__":
 79 |     print("🚀 Starting Enhanced RAG System (Ultra Clean Mode)")
 80 |     print("=" * 60)
 81 |     print("Note: All async errors and warnings are completely suppressed")
 82 |     print("=" * 60)
 83 |     
 84 |     # Set up complete clean environment FIRST
 85 |     setup_complete_clean_environment()
 86 |     
 87 |     try:
 88 |         # Import and run the main application
 89 |         from src.main import main
 90 |         main()
 91 |     except KeyboardInterrupt:
 92 |         print("\n\n👋 Application stopped by user")
 93 |     except Exception as e:
 94 |         # Only show truly important errors
 95 |         error_msg = str(e)
 96 |         if not any(keyword in error_msg.lower() for keyword in [
 97 |             "runtimeerror", "httpcore", "asyncio", "anyio", "await", "async"
 98 |         ]):
 99 |             print(f"\n❌ Application error: {e}")
100 |         sys.exit(1)
101 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_opensearch/src/Agent.ts:
--------------------------------------------------------------------------------
 1 | import MCPClient from "./MCPClient";
 2 | import ChatOpenAI from "./ChatOpenAI";
 3 | import { logTitle } from "./utils";
 4 | import { ToolCall } from "./ChatOpenAI";
 5 | 
 6 | export default class Agent {
 7 |     private mcpClients: MCPClient[];
 8 |     private llm: ChatOpenAI | null = null;
 9 |     private model: string;
10 |     private systemPrompt: string;
11 |     private context: string;
12 | 
13 |     constructor(model: string, mcpClients: MCPClient[], systemPrompt: string = '', context: string = '') {
14 |         this.mcpClients = mcpClients;
15 |         this.model = model;
16 |         this.systemPrompt = systemPrompt;
17 |         this.context = context;
18 |     }
19 | 
20 |     async init() {
21 |         logTitle('TOOLS');
22 |         for await (const client of this.mcpClients) {
23 |             await client.init();
24 |         }
25 |         const tools = this.mcpClients.flatMap(client => client.getTools());
26 |         this.llm = new ChatOpenAI(this.model, this.systemPrompt, tools, this.context);
27 |     }
28 | 
29 |     async close() {
30 |         for await (const client of this.mcpClients) {
31 |             await client.close();
32 |         }
33 |     }
34 | 
35 |     async invoke(prompt: string) {
36 |         if (!this.llm) throw new Error('Agent not initialized');
37 |         
38 |         try {
39 |             logTitle('AGENT EXECUTION');
40 |             console.log("Invoking LLM with tools...");
41 |             
42 |             // Start the conversation with the user prompt
43 |             let response = await this.llm.chat(prompt);
44 |             
45 |             // Continue the conversation until no more tool calls are needed
46 |             while (response.toolCalls && response.toolCalls.length > 0) {
47 |                 logTitle('TOOL CALLS');
48 |                 console.log(`Processing ${response.toolCalls.length} tool calls`);
49 |                 
50 |                 // Process each tool call
51 |                 for (const toolCall of response.toolCalls) {
52 |                     await this.processToolCall(toolCall);
53 |                 }
54 |                 
55 |                 // Continue the conversation with the tool results
56 |                 response = await this.llm.chat();
57 |             }
58 |             
59 |             logTitle('FINAL RESPONSE');
60 |             console.log("Successfully completed request");
61 |             return response.content;
62 |         } catch (error) {
63 |             console.error("Error in agent execution:", error);
64 |             throw error;
65 |         }
66 |     }
67 |     
68 |     private async processToolCall(toolCall: ToolCall) {
69 |         try {
70 |             const { id, function: { name, arguments: argsString } } = toolCall;
71 |             console.log(`Executing tool call: ${name}`);
72 |             
73 |             // Parse the arguments
74 |             const args = JSON.parse(argsString);
75 |             
76 |             // Find the MCP client that can handle this tool
77 |             const toolName = name;
78 |             
79 |             // Find the appropriate client
80 |             const client = this.mcpClients[0]; // Since we only have one client
81 |             
82 |             if (!client) {
83 |                 throw new Error(`No MCP client found for tool: ${name}`);
84 |             }
85 |             
86 |             // Call the tool and get the result
87 |             const result = await client.callTool(toolName, args);
88 |             console.log(`Tool result: ${JSON.stringify(result).substring(0, 100)}...`);
89 |             
90 |             // Append the tool result to the conversation
91 |             this.llm?.appendToolResult(id, JSON.stringify(result));
92 |         } catch (error) {
93 |             console.error(`Error processing tool call: ${error}`);
94 |             // Append the error as the tool result
95 |             this.llm?.appendToolResult(toolCall.id, JSON.stringify({ error: error.message }));
96 |         }
97 |     }
98 | }


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/langfuse_config.py:
--------------------------------------------------------------------------------
  1 | """Langfuse configuration and utilities."""
  2 | 
  3 | from typing import Optional, Dict, Any
  4 | from ..config import config
  5 | 
  6 | try:
  7 |     from langfuse import Langfuse
  8 |     LANGFUSE_AVAILABLE = True
  9 | except ImportError:
 10 |     LANGFUSE_AVAILABLE = False
 11 |     Langfuse = None
 12 | 
 13 | class LangfuseSpanWrapper:
 14 |     """Wrapper for Langfuse spans to handle API differences."""
 15 |     def __init__(self, span):
 16 |         self.span = span
 17 |     
 18 |     def end(self, **kwargs):
 19 |         """End the span, handling different API versions."""
 20 |         try:
 21 |             # For Langfuse 3.x
 22 |             if hasattr(self.span, 'end'):
 23 |                 # Just call end without parameters
 24 |                 self.span.end()
 25 |         except Exception as e:
 26 |             print(f"Warning: Failed to end span: {e}")
 27 | 
 28 | class LangfuseConfig:
 29 |     """Langfuse configuration and trace management."""
 30 |     
 31 |     def __init__(self):
 32 |         self.client: Optional[Langfuse] = None
 33 |         self._initialize_client()
 34 |     
 35 |     def _initialize_client(self) -> None:
 36 |         """Initialize Langfuse client if available and configured."""
 37 |         if not LANGFUSE_AVAILABLE:
 38 |             print("Langfuse not available. Install with: pip install langfuse")
 39 |             return
 40 |         
 41 |         if not config.is_langfuse_enabled():
 42 |             print("Langfuse not configured. Skipping initialization.")
 43 |             return
 44 |         
 45 |         try:
 46 |             self.client = Langfuse(
 47 |                 host=config.LANGFUSE_HOST,
 48 |                 public_key=config.LANGFUSE_PUBLIC_KEY,
 49 |                 secret_key=config.LANGFUSE_SECRET_KEY
 50 |             )
 51 |             print("Langfuse initialized successfully")
 52 |         except Exception as e:
 53 |             print(f"Failed to initialize Langfuse: {e}")
 54 |             self.client = None
 55 |     
 56 |     def create_trace(self, name: str, input_data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None):
 57 |         """Create a new trace."""
 58 |         if not self.client:
 59 |             return None
 60 |         
 61 |         try:
 62 |             # For Langfuse 3.x
 63 |             trace_id = self.client.create_trace_id()
 64 |             # Use start_span without trace_id parameter
 65 |             trace = self.client.start_span(
 66 |                 name=name,
 67 |                 input=input_data,
 68 |                 metadata=metadata or {}
 69 |             )
 70 |             return LangfuseSpanWrapper(trace)
 71 |         except Exception as e:
 72 |             print(f"Failed to create trace: {e}")
 73 |             return None
 74 |     
 75 |     def create_span(self, trace, name: str, input_data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None):
 76 |         """Create a new span within a trace."""
 77 |         if not self.client:
 78 |             return None
 79 |         
 80 |         try:
 81 |             # For Langfuse 3.x
 82 |             span = self.client.start_span(
 83 |                 name=name,
 84 |                 input=input_data,
 85 |                 metadata=metadata or {}
 86 |             )
 87 |             return LangfuseSpanWrapper(span)
 88 |         except Exception as e:
 89 |             print(f"Failed to create span: {e}")
 90 |             return None
 91 |     
 92 |     def flush(self) -> None:
 93 |         """Flush pending traces."""
 94 |         if self.client:
 95 |             try:
 96 |                 self.client.flush()
 97 |             except Exception as e:
 98 |                 print(f"Failed to flush Langfuse: {e}")
 99 |     
100 |     @property
101 |     def is_enabled(self) -> bool:
102 |         """Check if Langfuse is enabled and available."""
103 |         return self.client is not None
104 | 
105 | # Global Langfuse instance
106 | langfuse_config = LangfuseConfig()
107 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/MCPClient.ts:
--------------------------------------------------------------------------------
 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 3 | import { Tool } from "@modelcontextprotocol/sdk/types.js";
 4 | 
 5 | export default class MCPClient {
 6 |     public mcp: Client;
 7 |     private command: string;
 8 |     private args: string[]
 9 |     private transport: StdioClientTransport | null = null;
10 |     private tools: Tool[] = [];
11 | 
12 |     constructor(name: string, command: string, args: string[], version?: string) {
13 |         this.mcp = new Client({ name, version: version || "0.0.1" });
14 |         this.command = command;
15 |         this.args = args;
16 |     }
17 | 
18 |     public async init() {
19 |         await this.connectToServer();
20 |     }
21 | 
22 |     public async close() {
23 |         await this.mcp.close();
24 |     }
25 | 
26 |     public getTools() {
27 |         return this.tools;
28 |     }
29 | 
30 |     public callTool(name: string, params: Record<string, any>) {
31 |         return this.mcp.callTool({
32 |             name,
33 |             arguments: params,
34 |         });
35 |     }
36 | 
37 |     private async connectToServer() {
38 |         try {
39 |             this.transport = new StdioClientTransport({
40 |                 command: this.command,
41 |                 args: this.args,
42 |             });
43 |             await this.mcp.connect(this.transport);
44 | 
45 |             try {
46 |                 const toolsResult = await this.mcp.listTools();
47 |                 this.tools = toolsResult.tools.map((tool) => {
48 |                     // Prefix tool names with the client name to ensure proper routing
49 |                     const prefixedName = `${this.mcp.name}___${tool.name}`;
50 |                     return {
51 |                         name: prefixedName,
52 |                         description: tool.description,
53 |                         inputSchema: tool.inputSchema,
54 |                     };
55 |                 });
56 |                 console.log(
57 |                     "Connected to server with tools:",
58 |                     this.tools.map(({ name }) => name)
59 |                 );
60 |             } catch (toolError) {
61 |                 console.error("Failed to list tools from MCP server: ", toolError);
62 |                 console.log("Adding fallback write_file tool manually");
63 |                 
64 |                 // Add a fallback write_file tool manually
65 |                 this.tools = [{
66 |                     name: `${this.mcp.name}___write_file`,
67 |                     description: "Write content to a file",
68 |                     inputSchema: {
69 |                         type: "object",
70 |                         properties: {
71 |                             path: { type: "string", description: "Path to the file" },
72 |                             content: { type: "string", description: "Content to write" }
73 |                         },
74 |                         required: ["path", "content"]
75 |                     }
76 |                 }];
77 |             }
78 |         } catch (e) {
79 |             console.error("Failed to connect to MCP server: ", e);
80 |             console.error("Error details:", e);
81 |             console.log("Will continue without MCP tools and rely on fallback methods");
82 |             
83 |             // Add a dummy tool so the agent can still make tool calls
84 |             this.tools = [{
85 |                 name: `${this.mcp.name}___write_file`,
86 |                 description: "Write content to a file (fallback)",
87 |                 inputSchema: {
88 |                     type: "object",
89 |                     properties: {
90 |                         path: { type: "string", description: "Path to the file" },
91 |                         content: { type: "string", description: "Content to write" }
92 |                     },
93 |                     required: ["path", "content"]
94 |                 }
95 |             }];
96 |         }
97 |     }
98 | }


--------------------------------------------------------------------------------
/model-hosting/standalone-vllm-reasoning.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: PersistentVolumeClaim
  4 | metadata:
  5 |   name: vllm-qwen-server
  6 |   annotations:
  7 |     kubernetes.io/pvc-protection: "false"  
  8 | spec:
  9 |   accessModes:
 10 |   - ReadWriteOnce
 11 |   resources:
 12 |     requests:
 13 |       storage: 900Gi
 14 |   storageClassName: gp3
 15 |   volumeMode: Filesystem
 16 | ---
 17 | apiVersion: apps/v1
 18 | kind: Deployment
 19 | metadata:
 20 |   name: vllm-qwen-server
 21 |   labels:
 22 |     app: vllm-qwen-server
 23 | spec:
 24 |   replicas: 1
 25 |   selector:
 26 |     matchLabels:
 27 |       app: vllm-qwen-server
 28 |   template:
 29 |     metadata:
 30 |       labels:
 31 |         app: vllm-qwen-server
 32 |     spec:
 33 |       # Updated nodeSelector to target GPU instances specifically
 34 |       nodeSelector:
 35 |             kubernetes.io/arch: amd64
 36 |             nvidia.com/gpu: present
 37 |             karpenter.sh/nodepool: gpu-inference
 38 |           # Updated tolerations to match the taint on the GPU nodepool
 39 |       tolerations:
 40 |           - key: "model-inferencing"
 41 |             operator: "Equal"
 42 |             value: "gpu-inference"
 43 |             effect: "NoSchedule"
 44 |       volumes:
 45 |       - name: cache-volume
 46 |         persistentVolumeClaim:
 47 |           claimName: vllm-qwen-server
 48 |       # vLLM needs to access the host's shared memory for tensor parallel inference.
 49 |       - name: shm
 50 |         emptyDir:
 51 |           medium: Memory
 52 |           sizeLimit: "32Gi"
 53 |       containers:
 54 |       - name: vllm-qwen-server
 55 |         image: vllm/vllm-openai:latest
 56 |         # image: vllm/vllm-openai:v0.7.3
 57 |         command: ["/bin/sh", "-c"]
 58 |         args: [
 59 |           "vllm serve Qwen/Qwen3-14B  --enable-auto-tool-choice --tool-call-parser hermes  --trust-remote-code --max-num-batched-tokens 32768  --max-num-seqs 8 --max-model-len 32768 --dtype bfloat16 --tensor-parallel-size 4 --gpu-memory-utilization 0.90"
 60 |           
 61 |         ]
 62 |         env:
 63 |         - name: HUGGING_FACE_HUB_TOKEN
 64 |           valueFrom:
 65 |             secretKeyRef:
 66 |               name: hf-token
 67 |               key: token
 68 |         - name: OMP_NUM_THREADS
 69 |           value: "8"  
 70 |         - name: VLLM_LOGGING_LEVEL
 71 |           value: "DEBUG"  
 72 |         - name: VLLM_DISABLE_COMPILE_CACHE
 73 |           value: "0"  
 74 |         # - name: PYTORCH_CUDA_ALLOC_CONF
 75 |         #   value: "max_split_size_mb:512,expandable_segments:True"
 76 |         - name: CUDA_VISIBLE_DEVICES
 77 |           value: "0,1,2,3"
 78 |         ports:
 79 |         - containerPort: 8000
 80 |         resources:
 81 |           limits:
 82 |             memory: 64Gi
 83 |             nvidia.com/gpu: "4"
 84 |           requests:
 85 |             cpu: "22"
 86 |             memory: 64Gi
 87 |             nvidia.com/gpu: "4"
 88 |         volumeMounts:
 89 |         - mountPath: /root/.cache/huggingface
 90 |           name: cache-volume
 91 |         - name: shm
 92 |           mountPath: /dev/shm
 93 |         livenessProbe:
 94 |           httpGet:
 95 |             path: /health
 96 |             port: 8000
 97 |           initialDelaySeconds: 240
 98 |           periodSeconds: 10
 99 |           failureThreshold: 30
100 |           successThreshold: 1
101 | 
102 |           
103 |         readinessProbe:
104 |           httpGet:
105 |             path: /health
106 |             port: 8000
107 |           initialDelaySeconds: 240
108 |           periodSeconds: 10
109 | 
110 | ---
111 | apiVersion: v1
112 | kind: Service
113 | metadata:
114 |   name: vllm-qwen-server
115 | spec:
116 |   ports:
117 |   - name: http-vllm-qwen-server
118 |     port: 8000
119 |     protocol: TCP
120 |     targetPort: 8000
121 |   # The label selector should match the deployment labels & it is useful for prefix caching feature
122 |   selector:
123 |     app: vllm-qwen-server
124 |   sessionAffinity: None
125 |   type: ClusterIP          
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/model-hosting/ray-server/llamacpp.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | import multiprocessing
  5 | import os
  6 | import logging
  7 | import time
  8 | from fastapi import FastAPI
  9 | from starlette.requests import Request
 10 | from starlette.responses import StreamingResponse, JSONResponse
 11 | 
 12 | from ray import serve
 13 | 
 14 | from starlette.requests import Request
 15 | from starlette.responses import StreamingResponse, JSONResponse
 16 | import logging
 17 | 
 18 | from llama_cpp import Llama
 19 | 
 20 | 
 21 | 
 22 | logger = logging.getLogger("ray.serve")
 23 | 
 24 | app = FastAPI()
 25 | 
 26 | # Define the deployment
 27 | # ray_actor_options={"num_cpus": 14},
 28 | @serve.deployment(name="LLamaCPPDeployment",  autoscaling_config={"min_replicas" : 10, "max_replicas": 10, "initial_replicas": 10, "upscale_delay_s": 5}, max_ongoing_requests=100, graceful_shutdown_timeout_s=600)
 29 | @serve.ingress(app)
 30 | class LLamaCPPDeployment:
 31 |     def __init__(self, parallelism: str):
 32 |         os.environ["OMP_NUM_THREADS"] = parallelism
 33 |         # Initialize the LLamaCPP model
 34 |         self.model_id = os.getenv("MODEL_ID", default="SanctumAI/Llama-3.2-1B-Instruct-GGUF")
 35 |         # Get filename from environment variable with default fallback to "*Q4_0.gguf"
 36 |         self.filename = os.getenv("MODEL_FILENAME", default="*Q4_0.gguf")
 37 |         self.n_ctx = int(os.getenv("N_CTX"))
 38 |         self.n_threads = int(os.getenv("N_THREADS"))
 39 |         # self.n_batch = int(os.getenv("N_BATCH"))
 40 |         # self.llama_cpp = Llama(model_path=MODEL_ID, n_ctx=self.n_ctx, n_batch=self.n_batch)
 41 |         self.llm = Llama.from_pretrained(repo_id=self.model_id,filename=self.filename,n_ctx=self.n_ctx,n_threads=self.n_threads)
 42 |         #"hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF",
 43 |         print("__init__ Complete")
 44 | 
 45 |     @app.post("/v1/chat/completions")
 46 |     async def call_llama(self, request: Request):
 47 |         try:
 48 |             body = await request.json()
 49 | 
 50 |             # Get the messages array from the body
 51 |             messages = body.get("messages", [])
 52 | 
 53 |             # Get the content from the last user message
 54 |             prompt = ""
 55 |             if messages:
 56 |                 for message in messages:
 57 |                     if message.get("role") == "user":
 58 |                         prompt = message.get("content", "")
 59 |                         break
 60 |         
 61 |             
 62 |             if not prompt:
 63 |                 return JSONResponse(
 64 |                     status_code=400,
 65 |                     content={"error": "prompt is required"}
 66 |                 )
 67 | 
 68 |             output = self.llm(
 69 |                 "Q: " + prompt + " A: ",
 70 |                 max_tokens=body.get("max_tokens", 32)
 71 |             )        
 72 |             
 73 |             return JSONResponse(content={
 74 |                 "id": "cmpl-" + os.urandom(12).hex(),
 75 |                 "object": "text_completion",
 76 |                 "created": int(time.time()),
 77 |                 "model": self.model_id,
 78 |                 "choices": [{
 79 |                     "text": output["choices"][0]["text"],
 80 |                     "index": 0,
 81 |                     "finish_reason": "stop"
 82 |                 }],
 83 |                 "usage": {
 84 |                     "prompt_tokens": len(prompt.split()),
 85 |                     "completion_tokens": len(output["choices"][0]["text"].split()),
 86 |                     "total_tokens": len(prompt.split()) + len(output["choices"][0]["text"].split())
 87 |                 }
 88 |             })
 89 | 
 90 |             
 91 |         except Exception as e:
 92 |             logger.error(f"Error: {str(e)}")
 93 |             return JSONResponse(
 94 |                 status_code=500,
 95 |                 content={"error": str(e)}
 96 |             )
 97 | 
 98 | 
 99 | # Get host CPU count
100 | host_cpu_count = multiprocessing.cpu_count()
101 | 
102 | model = LLamaCPPDeployment.bind("host_cpu_count")


--------------------------------------------------------------------------------
/model-hosting/standalone-vllm-vision.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: PersistentVolumeClaim
  4 | metadata:
  5 |   name: vllm-qwen-server-vision
  6 |   annotations:
  7 |     kubernetes.io/pvc-protection: "false"  
  8 | spec:
  9 |   accessModes:
 10 |   - ReadWriteOnce
 11 |   resources:
 12 |     requests:
 13 |       storage: 900Gi
 14 |   storageClassName: gp3
 15 |   volumeMode: Filesystem
 16 | ---
 17 | apiVersion: apps/v1
 18 | kind: Deployment
 19 | metadata:
 20 |   name: vllm-qwen-server-vision
 21 |   labels:
 22 |     app: vllm-qwen-server-vision
 23 | spec:
 24 |   replicas: 1
 25 |   selector:
 26 |     matchLabels:
 27 |       app: vllm-qwen-server-vision
 28 |   template:
 29 |     metadata:
 30 |       labels:
 31 |         app: vllm-qwen-server-vision
 32 |     spec:
 33 |       # Updated nodeSelector to target GPU instances specifically
 34 |       nodeSelector:
 35 |             kubernetes.io/arch: amd64
 36 |             nvidia.com/gpu: present
 37 |             karpenter.sh/nodepool: gpu-inference
 38 |           # Updated tolerations to match the taint on the GPU nodepool
 39 |       tolerations:
 40 |           - key: "model-inferencing"
 41 |             operator: "Equal"
 42 |             value: "gpu-inference"
 43 |             effect: "NoSchedule"
 44 |       volumes:
 45 |       - name: cache-volume
 46 |         persistentVolumeClaim:
 47 |           claimName: vllm-qwen-server-vision
 48 |       # vLLM needs to access the host's shared memory for tensor parallel inference.
 49 |       - name: shm
 50 |         emptyDir:
 51 |           medium: Memory
 52 |           sizeLimit: "32Gi"
 53 |       containers:
 54 |       - name: vllm-qwen-server-vision
 55 |         image: vllm/vllm-openai:latest
 56 |         # image: vllm/vllm-openai:v0.7.3
 57 |         command: ["/bin/sh", "-c"]
 58 |         args: [
 59 |           "vllm serve Qwen/Qwen2.5-VL-7B-Instruct  --enable-auto-tool-choice --tool-call-parser hermes  --trust-remote-code --max-num-batched-tokens 8192  --max-num-seqs 8 --max-model-len 8192 --dtype bfloat16 --tensor-parallel-size 4 --gpu-memory-utilization 0.90"
 60 |           
 61 |         ]
 62 |         env:
 63 |         - name: HUGGING_FACE_HUB_TOKEN
 64 |           valueFrom:
 65 |             secretKeyRef:
 66 |               name: hf-token
 67 |               key: token
 68 |         - name: OMP_NUM_THREADS
 69 |           value: "8"  
 70 |         - name: VLLM_LOGGING_LEVEL
 71 |           value: "DEBUG"  
 72 |         - name: VLLM_DISABLE_COMPILE_CACHE
 73 |           value: "0"  
 74 |         # - name: PYTORCH_CUDA_ALLOC_CONF
 75 |         #   value: "max_split_size_mb:512,expandable_segments:True"
 76 |         - name: CUDA_VISIBLE_DEVICES
 77 |           value: "0,1,2,3"
 78 |         ports:
 79 |         - containerPort: 8000
 80 |         resources:
 81 |           limits:
 82 |             memory: 64Gi
 83 |             nvidia.com/gpu: "4"
 84 |           requests:
 85 |             cpu: "22"
 86 |             memory: 64Gi
 87 |             nvidia.com/gpu: "4"
 88 |         volumeMounts:
 89 |         - mountPath: /root/.cache/huggingface
 90 |           name: cache-volume
 91 |         - name: shm
 92 |           mountPath: /dev/shm
 93 |         livenessProbe:
 94 |           httpGet:
 95 |             path: /health
 96 |             port: 8000
 97 |           initialDelaySeconds: 240
 98 |           periodSeconds: 10
 99 |           failureThreshold: 30
100 |           successThreshold: 1
101 | 
102 |           
103 |         readinessProbe:
104 |           httpGet:
105 |             path: /health
106 |             port: 8000
107 |           initialDelaySeconds: 240
108 |           periodSeconds: 10
109 | 
110 | ---
111 | apiVersion: v1
112 | kind: Service
113 | metadata:
114 |   name: vllm-qwen-server-vision
115 | spec:
116 |   ports:
117 |   - name: http-vllm-qwen-server-vision
118 |     port: 8000
119 |     protocol: TCP
120 |     targetPort: 8000
121 |   # The label selector should match the deployment labels & it is useful for prefix caching feature
122 |   selector:
123 |     app: vllm-qwen-server-vision
124 |   sessionAffinity: None
125 |   type: ClusterIP          
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/src/test_agents.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Agent Testing Script
  4 | 
  5 | This script tests the multi-agent system with various queries to ensure
  6 | all components are working correctly, including Langfuse integration.
  7 | """
  8 | 
  9 | import sys
 10 | import logging
 11 | from typing import List, Dict, Any
 12 | from .config import config
 13 | from .utils.logging import setup_logging, log_title
 14 | from .agents.supervisor_agent import supervisor_agent, supervisor_agent_with_langfuse
 15 | from .agents.knowledge_agent import knowledge_agent, knowledge_agent_with_langfuse
 16 | from .agents.mcp_agent import mcp_agent, mcp_agent_with_langfuse
 17 | 
 18 | def main():
 19 |     """Main function for testing agents."""
 20 |     # Setup logging
 21 |     setup_logging()
 22 |     logger = logging.getLogger(__name__)
 23 |     
 24 |     try:
 25 |         # Validate configuration
 26 |         config.validate_config()
 27 |         
 28 |         log_title("AGENT TESTING SCRIPT")
 29 |         logger.info("Starting agent testing process")
 30 |         
 31 |         # Test individual agents first
 32 |         test_individual_agents()
 33 |         
 34 |         # Test supervisor agent
 35 |         test_supervisor_agent()
 36 |         
 37 |         print("\n🎉 All tests completed!")
 38 |         
 39 |     except KeyboardInterrupt:
 40 |         print("\n\nTesting interrupted by user.")
 41 |         sys.exit(0)
 42 |     except Exception as e:
 43 |         logger.error(f"Agent testing failed: {e}")
 44 |         print(f"❌ Error: {e}")
 45 |         sys.exit(1)
 46 | 
 47 | def test_individual_agents():
 48 |     """Test individual agents."""
 49 |     log_title("INDIVIDUAL AGENT TESTS")
 50 |     
 51 |     # Test Knowledge Agent
 52 |     print("🧠 Testing Knowledge Agent...")
 53 |     try:
 54 |         if config.is_langfuse_enabled():
 55 |             knowledge_response = knowledge_agent_with_langfuse("Please scan the knowledge directory and report what files are available.")
 56 |             print(f"✅ Knowledge Agent Response (with Langfuse): {str(knowledge_response)[:200]}...")
 57 |         else:
 58 |             knowledge_response = knowledge_agent("Please scan the knowledge directory and report what files are available.")
 59 |             print(f"✅ Knowledge Agent Response: {str(knowledge_response)[:200]}...")
 60 |     except Exception as e:
 61 |         print(f"❌ Knowledge Agent failed: {e}")
 62 |     
 63 |     # Test MCP Agent
 64 |     print("\n🔧 Testing MCP Agent...")
 65 |     try:
 66 |         if config.is_langfuse_enabled():
 67 |             mcp_response = mcp_agent_with_langfuse("Please help me understand what tools are available for file operations.")
 68 |             print(f"✅ MCP Agent Response (with Langfuse): {str(mcp_response)[:200]}...")
 69 |         else:
 70 |             mcp_response = mcp_agent("Please help me understand what tools are available for file operations.")
 71 |             print(f"✅ MCP Agent Response: {str(mcp_response)[:200]}...")
 72 |     except Exception as e:
 73 |         print(f"❌ MCP Agent failed: {e}")
 74 | 
 75 | def test_supervisor_agent():
 76 |     """Test the supervisor agent with various queries."""
 77 |     log_title("SUPERVISOR AGENT TESTS")
 78 |     
 79 |     test_queries = [
 80 |         "What is the status of the knowledge base?",
 81 |         "Can you help me understand what files are in the knowledge directory?",
 82 |         "Please search for information about Bell's palsy if available."
 83 |     ]
 84 |     
 85 |     for i, query in enumerate(test_queries, 1):
 86 |         print(f"\n🧪 Test {i}: {query}")
 87 |         
 88 |         try:
 89 |             if config.is_langfuse_enabled():
 90 |                 response = supervisor_agent_with_langfuse(query)
 91 |                 print(f"✅ Success (with Langfuse): {str(response)[:300]}...")
 92 |             else:
 93 |                 response = supervisor_agent(query)
 94 |                 print(f"✅ Success: {str(response)[:300]}...")
 95 |                 
 96 |         except Exception as e:
 97 |             print(f"❌ Error: {e}")
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/src/EmbeddingRetriever.ts:
--------------------------------------------------------------------------------
 1 | import { logTitle } from "./utils";
 2 | import MilvusVectorStore from "./MilvusVectorStore";
 3 | import 'dotenv/config';
 4 | import fetch from 'node-fetch';
 5 | 
 6 | export default class EmbeddingRetriever {
 7 |     private embeddingModel: string;
 8 |     private vectorStore: MilvusVectorStore;
 9 |     private embeddingEndpoint: string;
10 | 
11 |     constructor(embeddingModel: string) {
12 |         this.embeddingModel = embeddingModel;
13 |         this.vectorStore = new MilvusVectorStore();
14 |         this.embeddingEndpoint = 'http://18.232.167.163:8080/v1/embeddings';
15 |     }
16 | 
17 |     async embedDocument(document: string) {
18 |         logTitle('EMBEDDING DOCUMENT');
19 |         const embedding = await this.embed(document);
20 |         this.vectorStore.addEmbedding(embedding, document);
21 |         return embedding;
22 |     }
23 | 
24 |     async embedQuery(query: string) {
25 |         logTitle('EMBEDDING QUERY');
26 |         const embedding = await this.embed(query);
27 |         return embedding;
28 |     }
29 | 
30 |     private async embed(document: string): Promise<number[]> {
31 |         try {
32 |             console.log(`Sending embedding request to custom endpoint: ${this.embeddingEndpoint}`);
33 |             console.log(`Document length: ${document.length} characters`);
34 |             
35 |             const response = await fetch(this.embeddingEndpoint, {
36 |                 method: 'POST',
37 |                 headers: {
38 |                     'Content-Type': 'application/json',
39 |                 },
40 |                 body: JSON.stringify({
41 |                     content: document
42 |                 }),
43 |             });
44 |             
45 |             if (!response.ok) {
46 |                 throw new Error(`HTTP error! Status: ${response.status}`);
47 |             }
48 |             
49 |             const responseBody = await response.json();
50 |             
51 |             // Check if we got a valid embedding
52 |             // The response format is an array with objects containing the embedding
53 |             if (Array.isArray(responseBody) && responseBody.length > 0 && responseBody[0].embedding) {
54 |                 // Extract the embedding from the first item in the array
55 |                 const embedding = responseBody[0].embedding;
56 |                 
57 |                 // Check if the embedding is a nested array and flatten it if needed
58 |                 const flatEmbedding = Array.isArray(embedding[0]) ? embedding[0] : embedding;
59 |                 
60 |                 console.log(`Successfully received embedding with ${flatEmbedding.length} dimensions`);
61 |                 return flatEmbedding;
62 |             } else {
63 |                 console.log("Warning: Embedding API didn't return a valid embedding");
64 |                 console.log("Response:", JSON.stringify(responseBody, null, 2));
65 |                 // Return a small random embedding vector for testing purposes
66 |                 return Array(1536).fill(0).map(() => Math.random());
67 |             }
68 |         } catch (error) {
69 |             console.error("Error fetching embedding from custom endpoint:", error);
70 |             // Return a mock embedding in case of error
71 |             return Array(1536).fill(0).map(() => Math.random());
72 |         }
73 |     }
74 | 
75 |     async retrieve(query: string, topK: number = 3): Promise<string[]> {
76 |         console.log(`Starting retrieval for query: "${query.substring(0, 50)}..."`);
77 |         
78 |         const queryEmbedding = await this.embedQuery(query);
79 |         console.log(`Generated query embedding with ${queryEmbedding.length} dimensions`);
80 |         
81 |         // Log a few values from the embedding to check consistency
82 |         console.log(`Embedding sample values: [${queryEmbedding.slice(0, 5).join(', ')}]`);
83 |         
84 |         const results = await this.vectorStore.search(queryEmbedding, topK);
85 |         console.log(`Search returned ${results.length} results`);
86 |         
87 |         if (results.length === 0) {
88 |             console.log(`WARNING: No results found for query: "${query.substring(0, 50)}..."`);
89 |         } else {
90 |             console.log(`First result preview: "${results[0].substring(0, 100)}..."`);
91 |         }
92 |         
93 |         return results;
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/scripts/start_tavily_server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Startup script for Tavily MCP Server
  4 | """
  5 | 
  6 | import os
  7 | import sys
  8 | import subprocess
  9 | import time
 10 | import requests
 11 | from pathlib import Path
 12 | from dotenv import load_dotenv
 13 | 
 14 | # Add the project root to Python path
 15 | project_root = Path(__file__).parent.parent
 16 | sys.path.insert(0, str(project_root))
 17 | 
 18 | # Load environment variables from .env file
 19 | env_file = project_root / ".env"
 20 | if env_file.exists():
 21 |     load_dotenv(env_file)
 22 |     print(f"✅ Loaded environment variables from {env_file}")
 23 | else:
 24 |     print(f"⚠️  No .env file found at {env_file}")
 25 |     print("Please create a .env file with your configuration")
 26 | 
 27 | def check_tavily_api_key():
 28 |     """Check if Tavily API key is configured"""
 29 |     api_key = os.getenv("TAVILY_API_KEY")
 30 |     if not api_key:
 31 |         print("❌ TAVILY_API_KEY environment variable is not set!")
 32 |         print("Please get your API key from https://tavily.com and set it in your .env file:")
 33 |         print("TAVILY_API_KEY=your-api-key-here")
 34 |         return False
 35 |     print(f"✅ Tavily API key configured: {api_key[:8]}...")
 36 |     return True
 37 | 
 38 | def check_server_health(max_retries=10, delay=2):
 39 |     """Check if the Tavily MCP server is running and healthy"""
 40 |     for attempt in range(max_retries):
 41 |         try:
 42 |             # Try to connect to the MCP server endpoint
 43 |             response = requests.get("http://localhost:8001/", timeout=5)
 44 |             if response.status_code in [200, 404]:  # 404 is OK for MCP server root
 45 |                 print("✅ Tavily MCP server is healthy and ready!")
 46 |                 return True
 47 |         except requests.exceptions.RequestException:
 48 |             pass
 49 |         
 50 |         if attempt < max_retries - 1:
 51 |             print(f"⏳ Waiting for server to start... (attempt {attempt + 1}/{max_retries})")
 52 |             time.sleep(delay)
 53 |     
 54 |     print("❌ Server health check failed after maximum retries")
 55 |     return False
 56 | 
 57 | def start_tavily_server():
 58 |     """Start the Tavily MCP server"""
 59 |     if not check_tavily_api_key():
 60 |         return False
 61 |     
 62 |     print("🚀 Starting Tavily MCP Server...")
 63 |     
 64 |     # Path to the server script
 65 |     server_script = project_root / "src" / "mcp_servers" / "tavily_search_server.py"
 66 |     
 67 |     if not server_script.exists():
 68 |         print(f"❌ Server script not found: {server_script}")
 69 |         return False
 70 |     
 71 |     try:
 72 |         # Start the server as a subprocess
 73 |         process = subprocess.Popen([
 74 |             sys.executable, str(server_script)
 75 |         ], cwd=str(project_root))
 76 |         
 77 |         print(f"📡 Server started with PID: {process.pid}")
 78 |         print("🔗 MCP server available at: http://localhost:8001/mcp")
 79 |         
 80 |         # Wait a moment for server to start
 81 |         time.sleep(3)
 82 |         
 83 |         # Check if server is healthy
 84 |         if check_server_health():
 85 |             print("\n🎉 Tavily MCP Server is ready!")
 86 |             print("\nAvailable tools:")
 87 |             print("  - web_search: General web search with AI-generated answers")
 88 |             print("  - news_search: Recent news and current events search")
 89 |             print("  - health_check: Service health status")
 90 |             print("\n💡 The supervisor agent will automatically use web search when RAG relevance is low (<0.3)")
 91 |             return True
 92 |         else:
 93 |             print("❌ Server failed to start properly")
 94 |             process.terminate()
 95 |             return False
 96 |             
 97 |     except Exception as e:
 98 |         print(f"❌ Failed to start server: {e}")
 99 |         return False
100 | 
101 | if __name__ == "__main__":
102 |     success = start_tavily_server()
103 |     if success:
104 |         print("\n✨ Server is running! Press Ctrl+C to stop.")
105 |         try:
106 |             # Keep the script running
107 |             while True:
108 |                 time.sleep(1)
109 |         except KeyboardInterrupt:
110 |             print("\n🛑 Shutting down server...")
111 |     else:
112 |         print("\n❌ Failed to start Tavily MCP Server")
113 |         sys.exit(1)
114 | 


--------------------------------------------------------------------------------
/agentic-apps/agentic_rag_milvus/README.md:
--------------------------------------------------------------------------------
  1 | # Agentic RAG with MCP and Custom Embedding
  2 | 
  3 | This project implements an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness, all without relying on frameworks like LangChain or LlamaIndex.
  4 | 
  5 | ## Project Overview
  6 | 
  7 | This application creates an AI agent that can:
  8 | 1. Retrieve relevant information from a knowledge base using vector embeddings
  9 | 2. Interact with external tools through the Model Context Protocol (MCP)
 10 | 3. Generate responses based on both the retrieved context and tool interactions
 11 | 4. Complete tasks like summarizing content and saving results to files
 12 | 
 13 | ## Architecture
 14 | 
 15 | The system is built with a modular architecture consisting of these key components:
 16 | 
 17 | ```
 18 | Agent → Manages the overall workflow and coordinates components
 19 |   ├── ChatOpenAI → Handles LLM interactions and tool calling
 20 |   ├── MCPClient(s) → Connects to MCP servers for tool access
 21 |   └── EmbeddingRetriever → Performs vector search for relevant context
 22 |       └── VectorStore → Stores and searches document embeddings
 23 | ```
 24 | 
 25 | ## Workflow Explanation
 26 | 
 27 | 1. **Initialization**:
 28 |    - The system loads knowledge documents and creates embeddings using a custom embedding endpoint
 29 |    - Embeddings are stored in a Milvus vector database
 30 |    - MCP clients are initialized to connect to tool servers
 31 | 
 32 | 2. **RAG Process**:
 33 |    - When a query is received, it's converted to an embedding
 34 |    - The system searches for the most relevant documents using cosine similarity
 35 |    - Retrieved documents are combined to form context for the LLM
 36 | 
 37 | 3. **Agent Execution**:
 38 |    - The agent initializes with the LLM, MCP clients, and retrieved context
 39 |    - The user query is sent to the LLM along with the context
 40 |    - The LLM generates responses and may request tool calls
 41 | 
 42 | 4. **Tool Usage**:
 43 |    - When the LLM requests a tool, the agent routes the call to the appropriate MCP client
 44 |    - The MCP client executes the tool and returns results
 45 |    - Results are fed back to the LLM to continue the conversation
 46 | 
 47 | 5. **Output Generation**:
 48 |    - The LLM generates a final response incorporating tool results and context
 49 |    - In the example task, it creates a markdown file with information about "Antonette"
 50 | 
 51 | ## Key Components
 52 | 
 53 | - **Agent**: Coordinates the overall workflow and manages tool usage
 54 | - **ChatOpenAI**: Handles interactions with the language model and tool calling
 55 | - **MCPClient**: Connects to MCP servers and manages tool calls
 56 | - **EmbeddingRetriever**: Creates and searches vector embeddings for relevant context
 57 | - **MilvusVectorStore**: Interfaces with Milvus for storing and retrieving embeddings
 58 | 
 59 | ## Getting Started
 60 | 
 61 | ### Prerequisites
 62 | 
 63 | - Node.js 18+
 64 | - pnpm or npm
 65 | - OpenAI API key
 66 | - Milvus database instance
 67 | 
 68 | ### Installation
 69 | 
 70 | ```bash
 71 | # Clone the repository
 72 | git clone <repository-url>
 73 | 
 74 | # Install dependencies
 75 | pnpm install
 76 | 
 77 | # Set up environment variables
 78 | # Create a .env file with:
 79 | # - OPENAI_API_KEY
 80 | # - OPENAI_BASE_URL (optional)
 81 | # - MILVUS_ADDRESS
 82 | ```
 83 | 
 84 | ### Usage
 85 | 
 86 | ```bash
 87 | # Embed knowledge documents
 88 | pnpm embed-knowledge
 89 | 
 90 | # Embed CSV data (optional)
 91 | pnpm embed-csv
 92 | 
 93 | # Run the application
 94 | pnpm dev
 95 | ```
 96 | 
 97 | ## Example Use Case
 98 | 
 99 | The current implementation demonstrates a task where the agent:
100 | 1. Retrieves information about a user named "Antonette" from the knowledge base
101 | 2. Summarizes the information and creates a story about her
102 | 3. Saves the output to a markdown file using the filesystem MCP tool
103 | 
104 | ## Extending the System
105 | 
106 | This modular architecture can be easily extended:
107 | - Add more MCP servers for additional tool capabilities
108 | - Implement advanced Milvus features like filtering and hybrid search
109 | - Add more sophisticated RAG techniques like reranking or chunking
110 | - Implement conversation history for multi-turn interactions
111 | - Deploy as a service with API endpoints
112 | - Integrate with different LLM providers
113 | - Scale the vector database for production workloads
114 | 


--------------------------------------------------------------------------------
/agentic-apps/strandsdk_agentic_rag_opensearch/run_single_query_clean.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Clean single query runner with complete async error suppression.
  4 | """
  5 | 
  6 | import sys
  7 | import os
  8 | import warnings
  9 | 
 10 | # Add current directory to path
 11 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 12 | 
 13 | class CompleteAsyncErrorFilter:
 14 |     """Complete async error filter that suppresses all async-related output."""
 15 |     
 16 |     def __init__(self):
 17 |         self.original_stderr = sys.__stderr__
 18 |         
 19 |     def write(self, text):
 20 |         """Filter out all async RuntimeErrors and related output."""
 21 |         if not text.strip():
 22 |             return
 23 |             
 24 |         # Comprehensive list of patterns to suppress
 25 |         suppress_patterns = [
 26 |             "RuntimeError",
 27 |             "httpcore",
 28 |             "_synchronization",
 29 |             "asyncio",
 30 |             "anyio",
 31 |             "sniffio",
 32 |             "await",
 33 |             "async",
 34 |             "CancelScope",
 35 |             "shield",
 36 |             "current_task",
 37 |             "get_running_loop",
 38 |             "cancel_shielded_checkpoint",
 39 |             "_anyio_lock",
 40 |             "acquire",
 41 |             "File \"/home/ubuntu/Cost_Effective_and_Scalable_Models_Inference_on_AWS_Graviton/agentic-apps/strandsdk_agentic_rag_opensearch/venv/lib/python3.10/site-packages/httpcore",
 42 |             "File \"/usr/lib/python3.10/asyncio",
 43 |             "raise RuntimeError",
 44 |         ]
 45 |         
 46 |         # Check if this line should be suppressed
 47 |         should_suppress = any(pattern in text for pattern in suppress_patterns)
 48 |         
 49 |         # Also suppress lines that are just punctuation or whitespace
 50 |         if text.strip() in [":", "RuntimeError:", "RuntimeError: ", "RuntimeError", ""]:
 51 |             should_suppress = True
 52 |         
 53 |         # Only write if not suppressed and contains meaningful content
 54 |         if not should_suppress and len(text.strip()) > 1:
 55 |             self.original_stderr.write(text)
 56 |             self.original_stderr.flush()
 57 |         
 58 |     def flush(self):
 59 |         """Flush the original stderr."""
 60 |         self.original_stderr.flush()
 61 | 
 62 | def setup_complete_clean_environment():
 63 |     """Set up completely clean environment."""
 64 |     
 65 |     # Suppress all warnings
 66 |     warnings.filterwarnings("ignore")
 67 |     
 68 |     # Install complete error filter
 69 |     sys.stderr = CompleteAsyncErrorFilter()
 70 |     
 71 |     # Try to import and use existing cleanup if available
 72 |     try:
 73 |         from src.utils.global_async_cleanup import setup_global_async_cleanup
 74 |         setup_global_async_cleanup()
 75 |     except ImportError:
 76 |         pass
 77 | 
 78 | def run_clean_query(query: str):
 79 |     """Run a single query with completely clean output."""
 80 |     print("🚀 Enhanced RAG System - Single Query (Ultra Clean Mode)")
 81 |     print("=" * 60)
 82 |     print(f"Query: {query}")
 83 |     print("=" * 60)
 84 |     print("Note: All async errors and warnings are completely suppressed")
 85 |     print("=" * 60)
 86 |     
 87 |     # Set up complete clean environment FIRST
 88 |     setup_complete_clean_environment()
 89 |     
 90 |     try:
 91 |         from src.agents.supervisor_agent import supervisor_agent
 92 |         
 93 |         print("\n🔍 Processing query...")
 94 |         response = supervisor_agent(query)
 95 |         
 96 |         print("\n📝 Response:")
 97 |         print("-" * 40)
 98 |         print(response)
 99 |         print("-" * 40)
100 |         print("\n✅ Query completed successfully!")
101 |         
102 |         return True
103 |         
104 |     except Exception as e:
105 |         # Only show truly important errors
106 |         error_msg = str(e)
107 |         if not any(keyword in error_msg.lower() for keyword in [
108 |             "runtimeerror", "httpcore", "asyncio", "anyio", "await", "async"
109 |         ]):
110 |             print(f"\n❌ Error processing query: {e}")
111 |         return False
112 | 
113 | if __name__ == "__main__":
114 |     # Test with a sample query
115 |     test_query = "What is Bell's palsy and how is it treated?"
116 |     
117 |     if len(sys.argv) > 1:
118 |         # Use command line argument if provided
119 |         test_query = " ".join(sys.argv[1:])
120 |     
121 |     success = run_clean_query(test_query)
122 |     sys.exit(0 if success else 1)
123 | 


--------------------------------------------------------------------------------