├── agentic-apps ├── strandsdk_agentic_rag_opensearch │ ├── src │ │ ├── __init__.py │ │ ├── tools │ │ │ └── __init__.py │ │ ├── scripts │ │ │ ├── __init__.py │ │ │ └── embed_knowledge.py │ │ ├── agents │ │ │ ├── rag_agent.py │ │ │ └── __init__.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── logging.py │ │ │ ├── model_providers.py │ │ │ ├── opensearch_client.py │ │ │ ├── async_cleanup.py │ │ │ └── langfuse_config.py │ │ ├── config.py │ │ └── test_agents.py │ ├── k8s │ │ ├── service-account.yaml │ │ └── tavily-mcp-deployment.yaml │ ├── images │ │ └── arch.png │ ├── healthcheck-mcp.sh │ ├── healthcheck-main.sh │ ├── update-k8s-config.sh │ ├── requirements.txt │ ├── Dockerfile.mcp │ ├── startup-mcp.sh │ ├── startup-main.sh │ ├── pyproject.toml │ ├── .env.example │ ├── Dockerfile.main │ ├── run_main_clean.py │ ├── scripts │ │ └── start_tavily_server.py │ └── run_single_query_clean.py ├── agentic_rag_milvus │ ├── output │ │ └── test.md │ ├── images │ │ ├── image.png │ │ ├── image1.png │ │ ├── image2.png │ │ ├── image3.png │ │ └── image4.png │ ├── src │ │ ├── VectorStore.ts │ │ ├── utils.ts │ │ ├── package.json │ │ ├── README.md │ │ ├── embedKnowledge.ts │ │ ├── updateRAG.ts │ │ ├── embedCSV.ts │ │ ├── rebuildCollection.ts │ │ ├── MCPClient.ts │ │ └── EmbeddingRetriever.ts │ ├── tsconfig.json │ ├── knowledge │ │ ├── user_9.md │ │ ├── user_7.md │ │ ├── user_5.md │ │ ├── user_1.md │ │ ├── user_10.md │ │ ├── user_2.md │ │ ├── user_3.md │ │ ├── user_8.md │ │ ├── user_4.md │ │ └── user_6.md │ ├── test-file-write.js │ ├── list_collections.py │ ├── package.json │ ├── debug-auth.js │ ├── debug.js │ ├── debug-no-tools.js │ ├── test-mcp-server.js │ ├── test-endpoint.js │ ├── explore_collection.py │ ├── debug-tools.js │ ├── test-endpoint-with-tools.js │ ├── AmazonQ.md │ └── README.md ├── agentic-idp │ ├── requirements.txt │ ├── birth_cert.png │ ├── doc_reader.py │ ├── mcp.py │ ├── .env.example │ ├── decision.py │ └── storage.py └── agentic_rag_opensearch │ ├── src │ ├── scripts │ │ ├── index.ts │ │ └── embedKnowledge.ts │ ├── agents │ │ └── index.ts │ ├── VectorStore.ts │ ├── utils.ts │ ├── MCPClient.ts │ ├── embedKnowledge.ts │ ├── LangfuseConfig.ts │ ├── test-agents.ts │ ├── embedCSV.ts │ ├── test-langfuse.ts │ ├── index.ts │ └── Agent.ts │ ├── images │ ├── arch.png │ ├── image.png │ ├── image1.png │ ├── image2.png │ ├── image3.png │ └── image4.png │ ├── tsconfig.json │ ├── update-policy.json │ ├── package.json │ ├── .env.example │ ├── output │ └── antonette.md │ ├── MULTI_AGENT_GUIDE.md │ └── AmazonQ.md ├── model-hosting ├── ray-server │ ├── local-requirements.txt │ └── llamacpp.py ├── ray-services │ └── ingress │ │ ├── add-sg-lb-eks.sh │ │ ├── ingress-embedding.yaml │ │ └── ingress-cpu.yaml ├── standalone-llamacpp-embedding.yaml ├── standalone-vllm-reasoning.yaml └── standalone-vllm-vision.yaml ├── milvus ├── milvus-update.yaml ├── ebs-storage-class.yaml ├── milvus-nlb-service.yaml ├── milvus-standalone.yaml └── README.md ├── image ├── Inference_GenAI_architecture_EKS.jpg └── Inference_GenAI_app_architecture_EKS.jpg ├── CODE_OF_CONDUCT.md ├── base_eks_setup ├── gp3.yaml ├── tracking_stack.yaml ├── prometheus-monitoring.yaml └── karpenter_nodepool │ ├── graviton-nodepool.yaml │ ├── x86-nodepool.yaml │ ├── inf2-nodepool.yaml │ └── gpu-nodepool.yaml ├── model-observability ├── langfuse-secret.yaml ├── langfuse-redis-port-patch.yaml ├── langfuse-web-ingress.yaml ├── langfuse-value.yaml └── values.yaml.DEPRECATED ├── benchmark ├── Dockerfile └── prompts.txt ├── model-gateway └── litellm-ingress.yaml ├── LICENSE ├── .gitignore └── CONTRIBUTING.md /agentic-apps/strandsdk_agentic_rag_opensearch/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/output/test.md: -------------------------------------------------------------------------------- 1 | # Test File 2 | 3 | This is a test. -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/requirements.txt: -------------------------------------------------------------------------------- 1 | langfuse 2 | openai 3 | langgraph 4 | 5 | -------------------------------------------------------------------------------- /model-hosting/ray-server/local-requirements.txt: -------------------------------------------------------------------------------- 1 | llama_cpp_python==0.3.2 2 | transformers==4.46.0 3 | fastapi[all] 4 | ray==2.39.0 5 | starlette==0.41.3 -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/scripts/index.ts: -------------------------------------------------------------------------------- 1 | // Export scripts for programmatic usage if needed 2 | export * from './embedKnowledge'; 3 | 4 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/k8s/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: strandsdk-rag-service-account 5 | namespace: default 6 | 7 | -------------------------------------------------------------------------------- /milvus/milvus-update.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: milvus.io/v1beta1 2 | kind: Milvus 3 | metadata: 4 | name: my-release 5 | namespace: default 6 | spec: 7 | components: 8 | image: milvusdb/milvus:v2.5.12 9 | -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/birth_cert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic-idp/birth_cert.png -------------------------------------------------------------------------------- /image/Inference_GenAI_architecture_EKS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/image/Inference_GenAI_architecture_EKS.jpg -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/images/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image.png -------------------------------------------------------------------------------- /image/Inference_GenAI_app_architecture_EKS.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/image/Inference_GenAI_app_architecture_EKS.jpg -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/images/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image1.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/images/image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image2.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/images/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image3.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/images/image4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_milvus/images/image4.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/arch.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/image1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image1.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/image2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image2.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/image3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image3.png -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/images/image4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/agentic_rag_opensearch/images/image4.png -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/images/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-scalable-model-inference-and-agentic-ai-on-amazon-eks/HEAD/agentic-apps/strandsdk_agentic_rag_opensearch/images/arch.png -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/agents/rag_agent.py: -------------------------------------------------------------------------------- 1 | # This file has been removed - RAG functionality is now integrated into the supervisor_agent.py 2 | # The supervisor agent now directly uses the search_knowledge_base tool for RAG operations 3 | -------------------------------------------------------------------------------- /milvus/ebs-storage-class.yaml: -------------------------------------------------------------------------------- 1 | # ebs-storage-class.yaml 2 | apiVersion: storage.k8s.io/v1 3 | kind: StorageClass 4 | metadata: 5 | name: ebs-sc 6 | provisioner: ebs.csi.aws.com 7 | volumeBindingMode: WaitForFirstConsumer 8 | parameters: 9 | type: gp3 10 | encrypted: "true" 11 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions and helpers.""" 2 | 3 | from .logging import log_title, setup_logging 4 | from .langfuse_config import LangfuseConfig, langfuse_config 5 | 6 | __all__ = ["log_title", "setup_logging", "LangfuseConfig", "langfuse_config"] 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /base_eks_setup/gp3.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: storage.k8s.io/v1 2 | kind: StorageClass 3 | metadata: 4 | name: gp3 5 | annotations: 6 | storageclass.kubernetes.io/is-default-class: "true" 7 | provisioner: ebs.csi.aws.com 8 | parameters: 9 | type: gp3 10 | fsType: ext4 11 | reclaimPolicy: Delete 12 | volumeBindingMode: Immediate -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/agents/index.ts: -------------------------------------------------------------------------------- 1 | export { default as SupervisorAgent } from './SupervisorAgent'; 2 | export { default as KnowledgeAgent } from './KnowledgeAgent'; 3 | export { default as RAGAgent } from './RAGAgent'; 4 | export { default as MCPAgent } from './MCPAgent'; 5 | 6 | export type { AgentTask, AgentResult } from './SupervisorAgent'; 7 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/VectorStore.ts: -------------------------------------------------------------------------------- 1 | export interface VectorStoreItem { 2 | embedding: number[]; 3 | document: string; 4 | } 5 | 6 | export interface VectorStore { 7 | addEmbedding(embedding: number[], document: string): Promise; 8 | search(queryEmbedding: number[], topK: number): Promise; 9 | close?(): Promise; 10 | } 11 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/VectorStore.ts: -------------------------------------------------------------------------------- 1 | export interface VectorStoreItem { 2 | embedding: number[]; 3 | document: string; 4 | } 5 | 6 | export interface VectorStore { 7 | addEmbedding(embedding: number[], document: string): Promise; 8 | search(queryEmbedding: number[], topK: number): Promise; 9 | close?(): Promise; 10 | } 11 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/agents/__init__.py: -------------------------------------------------------------------------------- 1 | """Multi-agent system using Strands SDK with built-in tracing.""" 2 | 3 | from .supervisor_agent import supervisor_agent 4 | from .knowledge_agent import knowledge_agent 5 | from .mcp_agent import mcp_agent 6 | 7 | __all__ = [ 8 | "supervisor_agent", 9 | "knowledge_agent", 10 | "mcp_agent" 11 | ] 12 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "moduleResolution": "Bundler", 6 | "esModuleInterop": true, 7 | "outDir": "./dist", 8 | "rootDir": "./src", 9 | "strict": true 10 | }, 11 | "include": [ 12 | "src/**/*" 13 | ], 14 | "exclude": [ 15 | "node_modules" 16 | ] 17 | } -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "moduleResolution": "Bundler", 6 | "esModuleInterop": true, 7 | "outDir": "./dist", 8 | "rootDir": "./src", 9 | "strict": true 10 | }, 11 | "include": [ 12 | "src/**/*" 13 | ], 14 | "exclude": [ 15 | "node_modules" 16 | ] 17 | } -------------------------------------------------------------------------------- /base_eks_setup/tracking_stack.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: '(SO9150) - Guidance for Scalable Model Inference and Agentic AI on Amazon EKS' 3 | Resources: 4 | EmptyResource: 5 | Type: 'AWS::CloudFormation::WaitConditionHandle' 6 | 7 | Outputs: 8 | ProjectStatus: 9 | Description: 'Project initialization status' 10 | Value: 'Starting the Agentic AI project' 11 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/utils.ts: -------------------------------------------------------------------------------- 1 | import chalk from "chalk"; 2 | 3 | export function logTitle(message: string) { 4 | const totalLength = 80; 5 | const messageLength = message.length; 6 | const padding = Math.max(0, totalLength - messageLength - 4); // 4 for the "==" 7 | const paddedMessage = `${'='.repeat(Math.floor(padding / 2))} ${message} ${'='.repeat(Math.ceil(padding / 2))}`; 8 | console.log(chalk.bold.cyanBright(paddedMessage)); 9 | } -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/utils.ts: -------------------------------------------------------------------------------- 1 | import chalk from "chalk"; 2 | 3 | export function logTitle(message: string) { 4 | const totalLength = 80; 5 | const messageLength = message.length; 6 | const padding = Math.max(0, totalLength - messageLength - 4); // 4 for the "==" 7 | const paddedMessage = `${'='.repeat(Math.floor(padding / 2))} ${message} ${'='.repeat(Math.ceil(padding / 2))}`; 8 | console.log(chalk.bold.cyanBright(paddedMessage)); 9 | } -------------------------------------------------------------------------------- /model-observability/langfuse-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | type: Opaque 4 | metadata: 5 | name: langfuse 6 | stringData: 7 | salt: changeme 8 | encryption-key: 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef # Generate with `openssl rand -hex 32` 9 | nextauth-secret: changeme 10 | postgresql-password: postgres 11 | clickhouse-password: changeme 12 | redis-password: changeme 13 | s3-user: minio 14 | s3-password: miniosecret -------------------------------------------------------------------------------- /benchmark/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM golang:1.21-alpine AS builder 3 | 4 | WORKDIR /app 5 | COPY . . 6 | RUN CGO_ENABLED=0 GOOS=linux go build -o benchmark perf_benchmark.go 7 | 8 | # Run stage 9 | FROM alpine:latest 10 | 11 | WORKDIR /app 12 | COPY --from=builder /app/benchmark . 13 | COPY prompts.txt /app 14 | 15 | ENV URL="http://localhost:8000/v1/chat/completions" 16 | ENV REQUESTS_PER_PROMPT=10 17 | ENV NUM_WARMUP_REQUESTS=3 18 | 19 | CMD ["./benchmark"] 20 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_9.md: -------------------------------------------------------------------------------- 1 | # Glenna Reichert 2 | 3 | - **Username**: Delphine 4 | - **Email**: Chaim_McDermott@dana.io 5 | - **Address**: 6 | - Street: Dayna Park 7 | - Suite: Suite 449 8 | - City: Bartholomebury 9 | - Zipcode: 76495-3109 10 | - Geo: 11 | - Latitude: 24.6463 12 | - Longitude: -168.8889 13 | - **Phone**: (775)976-6794 x41206 14 | - **Website**: conrad.com 15 | - **Company**: 16 | - Name: Yost and Sons 17 | - Catchphrase: Switchable contextual benchmark -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_7.md: -------------------------------------------------------------------------------- 1 | # Kurtis Weissnat 2 | 3 | - **Username**: Elwyn.Skiles 4 | - **Email**: Telly.Hoeger@billy.biz 5 | - **Address**: 6 | - Street: Rex Trail 7 | - Suite: Suite 280 8 | - City: Howemouth 9 | - Zipcode: 58804-1099 10 | - Geo: 11 | - Latitude: 24.8918 12 | - Longitude: 21.8984 13 | - **Phone**: 210.067.6132 14 | - **Website**: elvis.io 15 | - **Company**: 16 | - Name: Johns Group 17 | - Catchphrase: Configurable multimedia task-force 18 | - BS: generate enterprise e-tailers -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_5.md: -------------------------------------------------------------------------------- 1 | # Chelsey Dietrich 2 | 3 | - **Username**: Kamren 4 | - **Email**: Lucio_Hettinger@annie.ca 5 | - **Address**: 6 | - Street: Skiles Walks 7 | - Suite: Suite 351 8 | - City: Roscoeview 9 | - Zipcode: 33263 10 | - Geo: 11 | - Latitude: -31.8129 12 | - Longitude: 62.5342 13 | - **Phone**: (254)954-1289 14 | - **Website**: demarco.info 15 | - **Company**: 16 | - Name: Keebler LLC 17 | - Catchphrase: User-centric fault-tolerant solution 18 | - BS: revolutionize end-to-end systems -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_1.md: -------------------------------------------------------------------------------- 1 | # Leanne Graham 2 | 3 | - **Username**: Bret 4 | - **Email**: Sincere@april.biz 5 | - **Address**: 6 | - Street: Kulas Light 7 | - Suite: Apt. 556 8 | - City: Gwenborough 9 | - Zipcode: 92998-3874 10 | - Geo: 11 | - Latitude: -37.3159 12 | - Longitude: 81.1496 13 | - **Phone**: 1-770-736-8031 x56442 14 | - **Website**: hildegard.org 15 | - **Company**: 16 | - Name: Romaguera-Crona 17 | - Catchphrase: Multi-layered client-server neural-net 18 | - BS: harness real-time e-markets -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_10.md: -------------------------------------------------------------------------------- 1 | # Clementina DuBuque 2 | 3 | - **Username**: Moriah.Stanton 4 | - **Email**: Rey.Padberg@karina.biz 5 | - **Address**: 6 | - Street: Kattie Turnpike 7 | - Suite: Suite 198 8 | - City: Lebsackbury 9 | - Zipcode: 31428-2261 10 | - Geo: 11 | - Latitude: -38.2386 12 | - Longitude: 57.2232 13 | - **Phone**: 024-648-3804 14 | - **Website**: ambrose.net 15 | - **Company**: 16 | - Name: Hoeger LLC 17 | - Catchphrase: Centralized empowering task-force 18 | - BS: target end-to-end models -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_2.md: -------------------------------------------------------------------------------- 1 | # Ervin Howell 2 | 3 | - **Username**: Antonette 4 | - **Email**: Shanna@melissa.tv 5 | - **Address**: 6 | - Street: Victor Plains 7 | - Suite: Suite 879 8 | - City: Wisokyburgh 9 | - Zipcode: 90566-7771 10 | - Geo: 11 | - Latitude: -43.9509 12 | - Longitude: -34.4618 13 | - **Phone**: 010-692-6593 x09125 14 | - **Website**: anastasia.net 15 | - **Company**: 16 | - Name: Deckow-Crist 17 | - Catchphrase: Proactive didactic contingency 18 | - BS: synergize scalable supply-chains -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_3.md: -------------------------------------------------------------------------------- 1 | # Clementine Bauch 2 | 3 | - **Username**: Samantha 4 | - **Email**: Nathan@yesenia.net 5 | - **Address**: 6 | - Street: Douglas Extension 7 | - Suite: Suite 847 8 | - City: McKenziehaven 9 | - Zipcode: 59590-4157 10 | - Geo: 11 | - Latitude: -68.6102 12 | - Longitude: -47.0653 13 | - **Phone**: 1-463-123-4447 14 | - **Website**: ramiro.info 15 | - **Company**: 16 | - Name: Romaguera-Jacobson 17 | - Catchphrase: Face to face bifurcated interface 18 | - BS: e-enable strategic applications -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_8.md: -------------------------------------------------------------------------------- 1 | # Nicholas Runolfsdottir V 2 | 3 | - **Username**: Maxime_Nienow 4 | - **Email**: Sherwood@rosamond.me 5 | - **Address**: 6 | - Street: Ellsworth Summit 7 | - Suite: Suite 729 8 | - City: Aliyaview 9 | - Zipcode: 45169 10 | - Geo: 11 | - Latitude: -14.3990 12 | - Longitude: -120.7677 13 | - **Phone**: 586.493.6943 x140 14 | - **Website**: jacynthe.com 15 | - **Company**: 16 | - Name: Abernathy Group 17 | - Catchphrase: Implemented secondary concept 18 | - BS: e-enable extensible e-tailers -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_4.md: -------------------------------------------------------------------------------- 1 | # Patricia Lebsack 2 | 3 | - **Username**: Karianne 4 | - **Email**: Julianne.OConner@kory.org 5 | - **Address**: 6 | - Street: Hoeger Mall 7 | - Suite: Apt. 692 8 | - City: South Elvis 9 | - Zipcode: 53919-4257 10 | - Geo: 11 | - Latitude: 29.4572 12 | - Longitude: -164.2990 13 | - **Phone**: 493-170-9623 x156 14 | - **Website**: kale.biz 15 | - **Company**: 16 | - Name: Robel-Corkery 17 | - Catchphrase: Multi-tiered zero tolerance productivity 18 | - BS: transition cutting-edge web services -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/knowledge/user_6.md: -------------------------------------------------------------------------------- 1 | # Mrs. Dennis Schulist 2 | 3 | - **Username**: Leopoldo_Corkery 4 | - **Email**: Karley_Dach@jasper.info 5 | - **Address**: 6 | - Street: Norberto Crossing 7 | - Suite: Apt. 950 8 | - City: South Christy 9 | - Zipcode: 23505-1337 10 | - Geo: 11 | - Latitude: -71.4197 12 | - Longitude: 71.7478 13 | - **Phone**: 1-477-935-8478 x6430 14 | - **Website**: ola.org 15 | - **Company**: 16 | - Name: Considine-Lockman 17 | - Catchphrase: Synchronised bottom-line interface 18 | - BS: e-enable innovative applications -------------------------------------------------------------------------------- /model-observability/langfuse-redis-port-patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: langfuse-web 5 | namespace: default 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: langfuse-web 11 | env: 12 | - name: REDIS_PORT 13 | value: "6379" 14 | --- 15 | apiVersion: apps/v1 16 | kind: Deployment 17 | metadata: 18 | name: langfuse-worker 19 | namespace: default 20 | spec: 21 | template: 22 | spec: 23 | containers: 24 | - name: langfuse-worker 25 | env: 26 | - name: REDIS_PORT 27 | value: "6379" 28 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/test-file-write.js: -------------------------------------------------------------------------------- 1 | // Simple test script to verify file writing works 2 | import fs from 'fs'; 3 | import path from 'path'; 4 | 5 | const outPath = path.resolve(process.cwd(), 'output'); 6 | const filePath = path.join(outPath, 'test.md'); 7 | 8 | console.log(`Attempting to write to: ${filePath}`); 9 | console.log(`Output directory exists: ${fs.existsSync(outPath)}`); 10 | 11 | try { 12 | fs.writeFileSync(filePath, '# Test File\n\nThis is a test.'); 13 | console.log(`Successfully wrote file to ${filePath}`); 14 | } catch (error) { 15 | console.error(`Failed to write file: ${error}`); 16 | } 17 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/healthcheck-mcp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Health check script for MCP Server 4 | 5 | # Load environment variables from ConfigMap or local file 6 | if [ -f "/app/config/.env" ]; then 7 | export $(grep -v '^#' /app/config/.env | xargs) 2>/dev/null || true 8 | elif [ -f "/app/.env" ]; then 9 | export $(grep -v '^#' /app/.env | xargs) 2>/dev/null || true 10 | fi 11 | 12 | # Check if the MCP server is responding 13 | if curl -f -s http://localhost:8001/mcp/ > /dev/null 2>&1; then 14 | echo "MCP Server: Running" 15 | exit 0 16 | else 17 | echo "MCP Server: Not responding" 18 | exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/healthcheck-main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Health check script for Main Application (FastAPI Server) 4 | 5 | # Load environment variables from ConfigMap or local file 6 | if [ -f "/app/config/.env" ]; then 7 | export $(grep -v '^#' /app/config/.env | xargs) 2>/dev/null || true 8 | elif [ -f "/app/.env" ]; then 9 | export $(grep -v '^#' /app/.env | xargs) 2>/dev/null || true 10 | fi 11 | 12 | # Check if the FastAPI server is responding 13 | if curl -f -s http://localhost:8000/health > /dev/null 2>&1; then 14 | echo "FastAPI Server: Running" 15 | exit 0 16 | else 17 | echo "FastAPI Server: Not responding" 18 | exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /milvus/milvus-nlb-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: milvus-nlb 5 | namespace: default 6 | annotations: 7 | service.beta.kubernetes.io/aws-load-balancer-type: nlb 8 | service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing 9 | spec: 10 | type: LoadBalancer 11 | ports: 12 | - name: milvus 13 | port: 19530 14 | targetPort: 19530 15 | protocol: TCP 16 | - name: metrics 17 | port: 9091 18 | targetPort: 9091 19 | protocol: TCP 20 | selector: 21 | app.kubernetes.io/instance: my-release 22 | app.kubernetes.io/managed-by: milvus-operator 23 | app.kubernetes.io/name: milvus 24 | "milvus.io/service": "true" 25 | -------------------------------------------------------------------------------- /model-hosting/ray-services/ingress/add-sg-lb-eks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | REGION=ap-southeast-2 5 | CLUSTER_NAME=llm-eks-cluster 6 | ALB_NAME=llama-cpp-cpu-lb 7 | export AWS_DEFAULT_REGION=$REGION 8 | 9 | CLUSTER_SG=$(aws eks describe-cluster \ 10 | --name $CLUSTER_NAME \ 11 | --query 'cluster.resourcesVpcConfig.clusterSecurityGroupId' \ 12 | --output text) 13 | 14 | ALB_SG=$(aws elbv2 describe-load-balancers \ 15 | --names $ALB_NAME \ 16 | --query 'LoadBalancers[0].SecurityGroups[0]' \ 17 | --output text) 18 | 19 | aws ec2 authorize-security-group-ingress \ 20 | --group-id $CLUSTER_SG \ 21 | --source-group $ALB_SG \ 22 | --protocol tcp \ 23 | --port-range 0-65535 -------------------------------------------------------------------------------- /model-gateway/litellm-ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: litellm-ingress-alb 5 | annotations: 6 | kubernetes.io/ingress.class: alb 7 | alb.ingress.kubernetes.io/scheme: internet-facing 8 | alb.ingress.kubernetes.io/target-type: ip 9 | alb.ingress.kubernetes.io/healthcheck-path: / 10 | alb.ingress.kubernetes.io/healthcheck-port: "4000" 11 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]' 12 | spec: 13 | rules: 14 | - http: 15 | paths: 16 | - path: / 17 | pathType: Prefix 18 | backend: 19 | service: 20 | name: litellm 21 | port: 22 | number: 4000 23 | -------------------------------------------------------------------------------- /model-observability/langfuse-web-ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: langfuse-web-ingress-alb 5 | annotations: 6 | kubernetes.io/ingress.class: alb 7 | alb.ingress.kubernetes.io/scheme: internet-facing 8 | alb.ingress.kubernetes.io/target-type: ip 9 | alb.ingress.kubernetes.io/healthcheck-path: / 10 | alb.ingress.kubernetes.io/healthcheck-port: '3000' 11 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}]' 12 | spec: 13 | rules: 14 | - http: 15 | paths: 16 | - path: / 17 | pathType: Prefix 18 | backend: 19 | service: 20 | name: langfuse-web 21 | port: 22 | number: 3000 23 | -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/doc_reader.py: -------------------------------------------------------------------------------- 1 | from PyPDF2 import PdfReader 2 | 3 | from pathlib import Path 4 | import logging, base64 5 | 6 | 7 | def encode_image(image_path): 8 | """Encode image to base64 string""" 9 | with open(image_path, "rb") as image_file: 10 | return base64.b64encode(image_file.read()).decode("utf-8") 11 | 12 | # Add this function to handle PDF processing 13 | def process_pdf(pdf_path: str) -> str: 14 | """Process PDF and return its content""" 15 | try: 16 | reader = PdfReader(pdf_path) 17 | text = "" 18 | for page in reader.pages: 19 | text += page.extract_text() 20 | return text 21 | except Exception as e: 22 | logging.error(f"PDF processing error: {str(e)}") 23 | return "" -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/logging.py: -------------------------------------------------------------------------------- 1 | """Logging utilities for the application.""" 2 | 3 | import logging 4 | import sys 5 | from typing import Optional 6 | 7 | def setup_logging(level: str = "INFO") -> None: 8 | """Setup logging configuration.""" 9 | logging.basicConfig( 10 | level=getattr(logging, level.upper()), 11 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 12 | handlers=[ 13 | logging.StreamHandler(sys.stdout), 14 | ] 15 | ) 16 | 17 | def log_title(title: str, width: int = 60) -> None: 18 | """Print a formatted title for logging.""" 19 | border = "=" * width 20 | padding = (width - len(title) - 2) // 2 21 | formatted_title = f"{border}\n{' ' * padding} {title} {' ' * padding}\n{border}" 22 | print(formatted_title) 23 | -------------------------------------------------------------------------------- /model-hosting/ray-services/ingress/ingress-embedding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: embedding-ray-service-ingress 5 | annotations: 6 | alb.ingress.kubernetes.io/scheme: internet-facing 7 | alb.ingress.kubernetes.io/target-type: ip 8 | alb.ingress.kubernetes.io/load-balancer-name: "embedding-ray-lb" 9 | alb.ingress.kubernetes.io/healthcheck-protocol: HTTP 10 | alb.ingress.kubernetes.io/healthcheck-port: "8265" 11 | alb.ingress.kubernetes.io/healthcheck-path: "/" 12 | alb.ingress.kubernetes.io/success-codes: "200" 13 | spec: 14 | ingressClassName: alb 15 | rules: 16 | - http: 17 | paths: 18 | - path: / 19 | pathType: Prefix 20 | backend: 21 | service: 22 | name: ray-service-llamacpp-serve-svc 23 | port: 24 | number: 8000 25 | -------------------------------------------------------------------------------- /model-hosting/ray-services/ingress/ingress-cpu.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: llama-3-8b-cpu-llama-serve-ingress 5 | annotations: 6 | alb.ingress.kubernetes.io/scheme: internet-facing 7 | alb.ingress.kubernetes.io/target-type: ip 8 | alb.ingress.kubernetes.io/load-balancer-name: "llama-cpp-cpu-lb" 9 | alb.ingress.kubernetes.io/healthcheck-protocol: HTTP 10 | alb.ingress.kubernetes.io/healthcheck-port: "8265" 11 | alb.ingress.kubernetes.io/healthcheck-path: "/" 12 | alb.ingress.kubernetes.io/success-codes: "200" 13 | spec: 14 | ingressClassName: alb 15 | rules: 16 | - http: 17 | paths: 18 | - path: / 19 | pathType: Prefix 20 | backend: 21 | service: 22 | name: ray-service-llamacpp-serve-svc 23 | port: 24 | number: 8000 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/mcp.py: -------------------------------------------------------------------------------- 1 | # # https://modelcontextprotocol.io/quickstart/server 2 | # from typing import Any 3 | # import httpx 4 | # from mcp.server.fastmcp import FastMCP 5 | 6 | # # Initialize FastMCP server 7 | # mcp = FastMCP("weather") 8 | 9 | 10 | # @mcp.tool() 11 | # async def get_personal_info(state: str) -> str: 12 | # """Get weather alerts for a US state. 13 | 14 | # Args: 15 | # state: Two-letter US state code (e.g. CA, NY) 16 | # """ 17 | # url = f"{NWS_API_BASE}/alerts/active/area/{state}" 18 | # data = await make_nws_request(url) 19 | 20 | # if not data or "features" not in data: 21 | # return "Unable to fetch alerts or no alerts found." 22 | 23 | # if not data["features"]: 24 | # return "No active alerts for this state." 25 | 26 | # alerts = [format_alert(feature) for feature in data["features"]] 27 | # return "\n---\n".join(alerts) -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/.env.example: -------------------------------------------------------------------------------- 1 | # Model Configuration 2 | # API key for accessing the LLM vision model through the gateway 3 | LLAMA_VISION_MODEL_KEY=your-api-key-here 4 | 5 | # URL of the LiteLLM API gateway deployed in your EKS cluster 6 | # This should be the load balancer URL from your model-gateway deployment 7 | API_GATEWAY_URL=http://your-litellm-gateway-url 8 | 9 | # Langfuse Configuration for LLM Observability 10 | # URL of the Langfuse service deployed in your EKS cluster 11 | # This should be the load balancer URL from your model-observability deployment 12 | LANGFUSE_HOST=http://your-langfuse-host-url 13 | 14 | # Langfuse public key for authentication 15 | # Get this from your Langfuse dashboard after creating a project 16 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key 17 | 18 | # Langfuse secret key for authentication 19 | # Get this from your Langfuse dashboard after creating a project 20 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key 21 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/list_collections.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from pymilvus import connections, utility 4 | 5 | # Load environment variables 6 | load_dotenv() 7 | 8 | # Get Milvus address from environment 9 | milvus_address = os.getenv("MILVUS_ADDRESS", "localhost:19530") 10 | milvus_host, milvus_port = milvus_address.split(":") 11 | 12 | # Connect to Milvus 13 | print(f"Connecting to Milvus at {milvus_host}:{milvus_port}...") 14 | try: 15 | connections.connect( 16 | alias="default", 17 | host=milvus_host, 18 | port=milvus_port 19 | ) 20 | 21 | # List all collections 22 | collections = utility.list_collections() 23 | print("Available collections:") 24 | for collection in collections: 25 | print(f"- {collection}") 26 | 27 | # Close connection 28 | connections.disconnect("default") 29 | 30 | except Exception as e: 31 | print(f"Error connecting to Milvus: {e}") 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/update-policy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "Rules": [ 4 | { 5 | "Resource": [ 6 | "collection/vectordb" 7 | ], 8 | "Permission": [ 9 | "aoss:CreateCollectionItems", 10 | "aoss:DeleteCollectionItems", 11 | "aoss:UpdateCollectionItems", 12 | "aoss:DescribeCollectionItems" 13 | ], 14 | "ResourceType": "collection" 15 | }, 16 | { 17 | "Resource": [ 18 | "index/vectordb/*" 19 | ], 20 | "Permission": [ 21 | "aoss:CreateIndex", 22 | "aoss:DeleteIndex", 23 | "aoss:UpdateIndex", 24 | "aoss:DescribeIndex", 25 | "aoss:ReadDocument", 26 | "aoss:WriteDocument" 27 | ], 28 | "ResourceType": "index" 29 | } 30 | ], 31 | "Principal": [ 32 | "arn:aws:iam::412381761882:user/admin", 33 | "arn:aws:iam::412381761882:root" 34 | ], 35 | "Description": "Rule 1" 36 | } 37 | ] 38 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/update-k8s-config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Interactive Kubernetes ConfigMap Update Script 4 | # Updates k8s/configmap.yaml with your actual service endpoints and API keys 5 | 6 | echo "🚀 Starting Kubernetes ConfigMap Update Tool..." 7 | echo "" 8 | 9 | # Check if Python 3 is available 10 | if ! command -v python3 &> /dev/null; then 11 | echo "❌ Python 3 is required but not found" 12 | echo "Please install Python 3 and try again" 13 | exit 1 14 | fi 15 | 16 | # Check if k8s directory exists 17 | if [ ! -d "k8s" ]; then 18 | echo "❌ k8s directory not found" 19 | echo "Please run this script from the project root directory" 20 | exit 1 21 | fi 22 | 23 | # Check if configmap.yaml exists 24 | if [ ! -f "k8s/configmap.yaml" ]; then 25 | echo "❌ k8s/configmap.yaml not found" 26 | echo "Please ensure the ConfigMap file exists" 27 | exit 1 28 | fi 29 | 30 | # Run the Python script 31 | python3 ./update_k8s_config.py 32 | 33 | echo "" 34 | echo "✅ ConfigMap update completed!" 35 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agentic-rag-opensearch", 3 | "version": "1.0.0", 4 | "description": "Agentic RAG system with MCP and custom embedding", 5 | "main": "index.js", 6 | "type": "module", 7 | "scripts": { 8 | "dev": "tsx index.ts", 9 | "embed-knowledge": "tsx embedKnowledge.ts", 10 | "embed-csv": "tsx embedCSV.ts", 11 | "update-rag": "tsx updateRAG.ts", 12 | "test": "echo \"Error: no test specified\" && exit 1" 13 | }, 14 | "keywords": [ 15 | "rag", 16 | "mcp", 17 | "llm", 18 | "agent" 19 | ], 20 | "author": "", 21 | "license": "ISC", 22 | "dependencies": { 23 | "@aws-sdk/client-bedrock-runtime": "^3.525.0", 24 | "@modelcontextprotocol/sdk": "^1.10.1", 25 | "@modelcontextprotocol/server-filesystem": "^2025.3.28", 26 | "@zilliz/milvus2-sdk-node": "^2.5.8", 27 | "chalk": "^5.4.1", 28 | "csv-parse": "^5.5.5", 29 | "dotenv": "^16.4.5", 30 | "node-fetch": "^3.3.2", 31 | "openai": "^4.28.4", 32 | "tsx": "^4.7.1", 33 | "typescript": "^5.3.3" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agentic-rag-opensearch", 3 | "version": "1.0.0", 4 | "description": "Multi-Agent RAG system with MCP and OpenSearch", 5 | "main": "index.js", 6 | "type": "module", 7 | "scripts": { 8 | "dev": "tsx src/index.ts", 9 | "embed-knowledge": "tsx src/scripts/embedKnowledge.ts", 10 | "test-agents": "tsx src/test-agents.ts", 11 | "test": "echo \"Error: no test specified\" && exit 1" 12 | }, 13 | "keywords": [ 14 | "rag", 15 | "mcp", 16 | "llm", 17 | "agent", 18 | "opensearch", 19 | "multi-agent" 20 | ], 21 | "author": "", 22 | "license": "ISC", 23 | "dependencies": { 24 | "@modelcontextprotocol/sdk": "^1.10.1", 25 | "@modelcontextprotocol/server-filesystem": "^2025.3.28", 26 | "@opensearch-project/opensearch": "^3.5.1", 27 | "aws-sdk": "^2.1574.0", 28 | "chalk": "^5.4.1", 29 | "csv-parse": "^5.5.5", 30 | "dotenv": "^16.4.5", 31 | "langfuse": "^3.30.0", 32 | "node-fetch": "^3.3.2", 33 | "openai": "^4.28.4", 34 | "tsx": "^4.7.1", 35 | "typescript": "^5.3.3" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/model_providers.py: -------------------------------------------------------------------------------- 1 | """Model provider configurations for Strands agents.""" 2 | 3 | from strands.models.openai import OpenAIModel 4 | from ..config import config 5 | 6 | def create_openai_reasoning_model(): 7 | """Create an OpenAI model instance for reasoning tasks.""" 8 | return OpenAIModel( 9 | client_args={ 10 | "api_key": config.LITELLM_API_KEY, 11 | "base_url": config.LITELLM_BASE_URL, 12 | }, 13 | model_id=config.REASONING_MODEL, 14 | params={ 15 | "temperature": 0.7, 16 | "max_tokens": 4096, 17 | } 18 | ) 19 | 20 | def get_reasoning_model(): 21 | """Get the configured reasoning model for agents.""" 22 | try: 23 | # Try to use OpenAI client 24 | return create_openai_reasoning_model() 25 | except ImportError: 26 | # Fallback to string model ID 27 | return config.REASONING_MODEL 28 | except Exception as e: 29 | print(f"Warning: Failed to create OpenAI model, falling back to string ID: {e}") 30 | return config.REASONING_MODEL 31 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agentic-rag-opensearch", 3 | "version": "1.0.0", 4 | "description": "Agentic RAG system with MCP and custom embedding", 5 | "main": "index.js", 6 | "type": "module", 7 | "scripts": { 8 | "dev": "tsx src/index.ts", 9 | "embed-knowledge": "tsx src/embedKnowledge.ts", 10 | "embed-csv": "tsx src/embedCSV.ts", 11 | "update-rag": "tsx src/updateRAG.ts", 12 | "rebuild-collection": "tsx src/rebuildCollection.ts", 13 | "test": "echo \"Error: no test specified\" && exit 1" 14 | }, 15 | "keywords": [ 16 | "rag", 17 | "mcp", 18 | "llm", 19 | "agent" 20 | ], 21 | "author": "", 22 | "license": "ISC", 23 | "dependencies": { 24 | "@aws-sdk/client-bedrock-runtime": "^3.525.0", 25 | "@modelcontextprotocol/sdk": "^1.10.1", 26 | "@modelcontextprotocol/server-filesystem": "^2025.3.28", 27 | "@zilliz/milvus2-sdk-node": "^2.5.8", 28 | "chalk": "^5.4.1", 29 | "csv-parse": "^5.5.5", 30 | "dotenv": "^16.4.5", 31 | "node-fetch": "^3.3.2", 32 | "openai": "^4.28.4", 33 | "tsx": "^4.7.1", 34 | "typescript": "^5.3.3" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/requirements.txt: -------------------------------------------------------------------------------- 1 | # Strands SDK and core dependencies 2 | strands-agents>=0.1.0 3 | strands-agents-tools>=0.1.0 4 | strands-agents[litellm]>=0.1.0 5 | 6 | # OpenAI and LLM dependencies 7 | openai>=1.0.0 8 | litellm>=1.0.0 9 | 10 | # MCP dependencies 11 | mcp>=1.0.0 12 | fastmcp>=0.9.0 13 | fastapi>=0.104.0 14 | uvicorn>=0.24.0 15 | 16 | # AWS and OpenSearch dependencies 17 | boto3>=1.34.0 18 | opensearch-py>=2.4.0 19 | aws-requests-auth>=0.4.3 20 | langchain-aws>=0.1.0 21 | 22 | # Fix dependency conflicts by using older compatible versions 23 | dill==0.3.7 24 | datasets==2.14.0 25 | pyarrow>=12.0.0,<15.0.0 26 | 27 | # RAGAs evaluation dependencies - use compatible versions 28 | ragas>=0.1.0,<0.2.0 29 | langchain>=0.1.0,<0.3.0 30 | langchain-core>=0.1.0,<0.3.0 31 | 32 | # Vector embeddings and ML 33 | numpy>=1.24.0,<2.0.0 34 | scikit-learn>=1.3.0 35 | 36 | # Data processing 37 | pandas>=2.0.0 38 | python-dotenv>=1.0.0 39 | requests>=2.31.0 40 | httpx>=0.25.0 41 | 42 | # Observability 43 | langfuse>=2.0.0 44 | 45 | # Utilities 46 | pydantic>=2.0.0 47 | aiofiles>=23.0.0 48 | 49 | # Additional FastAPI server dependencies 50 | python-multipart>=0.0.6 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Node.js dependencies 2 | node_modules/ 3 | npm-debug.log 4 | yarn-debug.log 5 | yarn-error.log 6 | .pnpm-debug.log 7 | 8 | # Environment variables 9 | .env 10 | .env.local 11 | .env.development.local 12 | .env.test.local 13 | .env.production.local 14 | 15 | # Build outputs 16 | dist/ 17 | build/ 18 | out/ 19 | .next/ 20 | 21 | # Cache directories 22 | .npm/ 23 | .pnpm-store/ 24 | .yarn/cache 25 | .yarn/unplugged 26 | .yarn/build-state.yml 27 | .yarn/install-state.gz 28 | 29 | # Python virtual environments 30 | venv/ 31 | env/ 32 | .venv/ 33 | .env/ 34 | ENV/ 35 | 36 | # Python cache files 37 | __pycache__/ 38 | *.py[cod] 39 | *$py.class 40 | *.so 41 | 42 | # Editor directories and files 43 | .idea/ 44 | .vscode/ 45 | *.swp 46 | *.swo 47 | *~ 48 | 49 | # OS generated files 50 | .DS_Store 51 | .DS_Store? 52 | ._* 53 | .Spotlight-V100 54 | .Trashes 55 | ehthumbs.db 56 | Thumbs.db 57 | 58 | # Project specific files 59 | agentic-apps/agentic_rag_opensearch/pnpm-lock.yaml 60 | agentic-apps/agentic_rag_opensearch/.knowledge-metadata.json 61 | 62 | # Agentic apps output directories 63 | agentic-apps/strandsdk_agentic_rag_opensearch/output/ 64 | # Local backup files with real secrets 65 | **/*.bak 66 | **/configmap.yaml.bak 67 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/.env.example: -------------------------------------------------------------------------------- 1 | # OpenAI API Configuration 2 | # API key for accessing the LLM model 3 | # This should match the key configured in your LiteLLM gateway 4 | OPENAI_API_KEY=your-openai-api-key 5 | 6 | # Base URL for the OpenAI-compatible API 7 | # This should be the URL of your model hosting service (Ray service or LiteLLM gateway) 8 | OPENAI_BASE_URL=http://your-model-endpoint/v1 9 | 10 | # OpenSearch Configuration 11 | # Endpoint URL of your OpenSearch cluster 12 | # This will be automatically set by the setup-opensearch.sh script 13 | OPENSEARCH_ENDPOINT=https://your-opensearch-domain-endpoint 14 | 15 | # AWS region where your OpenSearch cluster is deployed 16 | AWS_REGION=us-east-1 17 | 18 | # Langfuse Configuration for LLM Observability 19 | # URL of the Langfuse service deployed in your EKS cluster 20 | # This should be the load balancer URL from your model-observability deployment 21 | LANGFUSE_HOST=http://your-langfuse-host-url 22 | 23 | # Langfuse public key for authentication 24 | # Get this from your Langfuse dashboard after creating a project 25 | LANGFUSE_PUBLIC_KEY=pk-lf-your-public-key 26 | 27 | # Langfuse secret key for authentication 28 | # Get this from your Langfuse dashboard after creating a project 29 | LANGFUSE_SECRET_KEY=sk-lf-your-secret-key 30 | -------------------------------------------------------------------------------- /model-observability/langfuse-value.yaml: -------------------------------------------------------------------------------- 1 | # This values.yaml file demonstrates how to use the basic chart with a single, pre-created secret. 2 | # Secrets must be set manually or via External Secrets Operator like https://external-secrets.io/latest or any other secret management tool. 3 | langfuse: 4 | encryptionKey: 5 | secretKeyRef: 6 | name: langfuse 7 | key: encryption-key 8 | 9 | salt: 10 | secretKeyRef: 11 | name: langfuse 12 | key: salt 13 | 14 | nextauth: 15 | secret: 16 | secretKeyRef: 17 | name: langfuse 18 | key: nextauth-secret 19 | 20 | # Add environment variables for web and worker deployments 21 | web: 22 | extraEnvVars: 23 | - name: REDIS_PORT 24 | value: "6379" 25 | 26 | worker: 27 | extraEnvVars: 28 | - name: REDIS_PORT 29 | value: "6379" 30 | 31 | postgresql: 32 | auth: 33 | existingSecret: langfuse 34 | secretKeys: 35 | adminPasswordKey: postgresql-password 36 | userPasswordKey: postgresql-password 37 | 38 | clickhouse: 39 | auth: 40 | existingSecret: langfuse 41 | existingSecretKey: clickhouse-password 42 | 43 | redis: 44 | auth: 45 | existingSecret: langfuse 46 | existingSecretPasswordKey: redis-password 47 | 48 | s3: 49 | auth: 50 | existingSecret: langfuse 51 | rootUserSecretKey: s3-user 52 | rootPasswordSecretKey: s3-password 53 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/Dockerfile.mcp: -------------------------------------------------------------------------------- 1 | # Dockerfile for MCP Server (Tavily Search Server) 2 | FROM python:3.11-slim as base 3 | 4 | # Set environment variables 5 | ENV PYTHONUNBUFFERED=1 6 | ENV PYTHONDONTWRITEBYTECODE=1 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | 9 | # Install system dependencies 10 | RUN apt-get update && apt-get install -y \ 11 | curl \ 12 | git \ 13 | build-essential \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Set working directory 17 | WORKDIR /app 18 | 19 | # Install only minimal dependencies needed for MCP server 20 | RUN pip install --no-cache-dir \ 21 | fastmcp>=0.9.0 \ 22 | python-dotenv>=1.0.0 \ 23 | httpx>=0.25.0 \ 24 | requests>=2.31.0 \ 25 | pydantic>=2.0.0 26 | 27 | # Copy the entire application 28 | COPY . . 29 | 30 | # Create necessary directories 31 | RUN mkdir -p logs 32 | 33 | # Set proper permissions 34 | RUN chmod +x scripts/*.py 2>/dev/null || true 35 | 36 | # Copy startup and health check scripts 37 | COPY startup-mcp.sh /app/startup-mcp.sh 38 | COPY healthcheck-mcp.sh /app/healthcheck-mcp.sh 39 | 40 | # Make startup and health check scripts executable 41 | RUN chmod +x /app/startup-mcp.sh /app/healthcheck-mcp.sh 42 | 43 | # Expose the MCP server port 44 | EXPOSE 8001 45 | 46 | # Add health check 47 | HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ 48 | CMD /app/healthcheck-mcp.sh 49 | 50 | # Set the default command 51 | CMD ["/app/startup-mcp.sh"] 52 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/output/antonette.md: -------------------------------------------------------------------------------- 1 | # Antonette's Journey with Bell's Palsy 2 | 3 | ## Basic Information 4 | - **Name:** Antonette M. Rivera 5 | - **Age:** 34 6 | - **Occupation:** Graphic Designer 7 | - **Condition:** Bell's Palsy (idiopathic facial paralysis) 8 | - **Key Treatment:** Eye protection to prevent corneal damage 9 | 10 | ## The Story 11 | Antonette, a creative and detail-oriented graphic designer, woke up one morning with sudden facial numbness and drooping on her right side. She noticed she couldn’t blink her right eye fully and struggled to smile. Panicked, she rushed to the clinic, where her doctor confirmed Bell’s palsy—a diagnosis of exclusion requiring careful evaluation to rule out other causes. 12 | 13 | The physician emphasized the **most critical aspect of initial treatment**: **eye protection**. Antonette learned that her inability to close her right eyelid left her vulnerable to corneal drying and injury. To safeguard her vision, she was prescribed: 14 | - Artificial tears every 2 hours during the day 15 | - An eye patch at night 16 | - Lubricating ointment before sleep 17 | 18 | Though frustrated by the limitations, Antonette religiously followed the regimen. Over weeks, her facial muscles slowly regained strength. Her diligence paid off—no corneal damage occurred, and by 6 months, her smile was nearly back to normal. 19 | 20 | Antonette’s story underscores how **early, proactive eye care** can prevent irreversible harm in Bell’s palsy. Today, she advocates for awareness, sharing how prioritizing small daily steps saved her vision and quality of life. -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/debug-auth.js: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import 'dotenv/config'; 3 | 4 | // Create a simple test script to debug the API connection with authentication 5 | async function testConnection() { 6 | console.log('Testing OpenAI API connection with authentication...'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set'); 9 | 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | defaultHeaders: { 14 | "api-key": process.env.OPENAI_API_KEY, 15 | "Authorization": `Bearer ${process.env.OPENAI_API_KEY}` 16 | } 17 | }); 18 | 19 | try { 20 | // Try a simple chat completion without streaming 21 | console.log('\nAttempting a simple chat completion...'); 22 | const completion = await openai.chat.completions.create({ 23 | model: 'Qwen/QwQ-32B-AWQ', 24 | messages: [{ role: 'user', content: 'Hello, how are you?' }], 25 | stream: false, 26 | }); 27 | console.log('Chat completion successful:', completion); 28 | } catch (error) { 29 | console.error('Error with chat completion:', error); 30 | 31 | // Print more detailed error information 32 | if (error.response) { 33 | console.log('Response status:', error.response.status); 34 | console.log('Response headers:', error.response.headers); 35 | console.log('Response data:', error.response.data); 36 | } 37 | } 38 | } 39 | 40 | testConnection().catch(console.error); 41 | -------------------------------------------------------------------------------- /benchmark/prompts.txt: -------------------------------------------------------------------------------- 1 | In about 150 tokens, explain how generative AI models create new content from training data. 2 | Using approximately 150 tokens, describe the key differences between GPT-3 and GPT-4 architectures. 3 | In around 150 tokens, explain how temperature affects AI model output diversity. 4 | Describe, in about 150 tokens, the role attention mechanisms play in transformer models. 5 | In approximately 150 tokens, outline the process of fine-tuning a language model. 6 | Using about 150 tokens, explain how RAG improves AI model accuracy and knowledge. 7 | In roughly 150 tokens, discuss the main challenges in prompt engineering. 8 | Explain the concept of zero-shot learning in AI models, using about 150 tokens. 9 | In approximately 150 tokens, describe how embeddings represent text in vector space. 10 | Discuss the significance of context length in LLMs, using about 150 tokens. 11 | In around 150 tokens, describe the token limitation problem in language models. 12 | Explain how few-shot learning works in generative AI, using approximately 150 tokens. 13 | In about 150 tokens, outline the benefits of model quantization. 14 | Using roughly 150 tokens, explain the concept of knowledge distillation in AI. 15 | In approximately 150 tokens, describe how RLHF improves AI model outputs. 16 | Discuss the key metrics for evaluating GenAI models, using about 150 tokens. 17 | In around 150 tokens, describe the challenges of AI model hallucination. 18 | Explain how prompt injection affects model security, using approximately 150 tokens. 19 | In about 150 tokens, discuss the role of tokenization in language models. 20 | Using approximately 150 tokens, explain the concept of model alignment in AI. -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/startup-mcp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "Starting Strands SDK Tavily MCP Server (Clean Mode)..." 5 | 6 | # Load environment variables from ConfigMap or local file 7 | if [ -f "/app/config/.env" ]; then 8 | echo "Loading environment variables from ConfigMap .env file..." 9 | export $(grep -v '^#' /app/config/.env | xargs) 10 | echo "Environment variables loaded successfully from ConfigMap" 11 | elif [ -f "/app/.env" ]; then 12 | echo "Loading environment variables from local .env file..." 13 | export $(grep -v '^#' /app/.env | xargs) 14 | echo "Environment variables loaded successfully from local file" 15 | else 16 | echo "WARNING: No .env file found. Using environment variables from Kubernetes." 17 | fi 18 | 19 | # Verify critical environment variables 20 | echo "Verifying critical environment variables..." 21 | if [ -z "$TAVILY_API_KEY" ]; then 22 | echo "ERROR: TAVILY_API_KEY is not set" 23 | exit 1 24 | fi 25 | 26 | echo "Critical environment variables verified" 27 | 28 | echo "Starting Tavily MCP Server..." 29 | echo "Server will be available on port 8001" 30 | 31 | # Load .env and run the MCP server 32 | python -c " 33 | from dotenv import load_dotenv 34 | import os 35 | 36 | # Try to load from ConfigMap first, then fallback to local 37 | if os.path.exists('/app/config/.env'): 38 | load_dotenv('/app/config/.env') 39 | print('Loaded environment from ConfigMap') 40 | elif os.path.exists('/app/.env'): 41 | load_dotenv('/app/.env') 42 | print('Loaded environment from local file') 43 | 44 | # Now run the MCP server 45 | from src.mcp_server_standalone import main 46 | main() 47 | " 48 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/README.md: -------------------------------------------------------------------------------- 1 | # RAG System with Custom Embedding Model 2 | 3 | This project implements a Retrieval-Augmented Generation (RAG) system using a custom embedding model and Milvus vector database. 4 | 5 | ## Setup 6 | 7 | 1. Install dependencies: 8 | ```bash 9 | npm install 10 | ``` 11 | 12 | 2. Create a `.env` file with the following variables: 13 | ``` 14 | MILVUS_ADDRESS=your_milvus_address 15 | MILVUS_USERNAME=your_milvus_username 16 | MILVUS_PASSWORD=your_milvus_password 17 | AWS_REGION=your_aws_region 18 | ``` 19 | 20 | ## Embedding the CSV Data 21 | 22 | To embed the `q_c_data.csv` file from the knowledge folder: 23 | 24 | ```bash 25 | npm run embed-csv 26 | ``` 27 | 28 | This will process the CSV file, extract question-context pairs, and embed them using the custom embedding endpoint. 29 | 30 | ## Running the Application 31 | 32 | To run the main application: 33 | 34 | ```bash 35 | npm start 36 | ``` 37 | 38 | ## Files 39 | 40 | - `index.ts`: Main application entry point 41 | - `EmbeddingRetriever.ts`: Handles embedding generation using the custom endpoint 42 | - `MilvusVectorStore.ts`: Manages vector storage and retrieval in Milvus 43 | - `Agent.ts`: Implements the agent that uses the RAG system 44 | - `updateRAG.ts`: Script to process and embed the CSV data 45 | 46 | ## Custom Embedding Endpoint 47 | 48 | The system uses a custom embedding endpoint at http://18.232.167.163:8080/v1/embeddings instead of AWS Bedrock. 49 | 50 | Example request: 51 | ```bash 52 | curl --request POST \ 53 | --url http://18.232.167.163:8080/v1/embeddings \ 54 | --header "Content-Type: application/json" \ 55 | --data '{"content": "Your text here"}' 56 | ``` 57 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/debug.js: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import 'dotenv/config'; 3 | 4 | // Create a simple test script to debug the API connection 5 | async function testConnection() { 6 | console.log('Testing OpenAI API connection...'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set'); 9 | 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | }); 14 | 15 | try { 16 | // First try to list models 17 | console.log('\nAttempting to list models...'); 18 | const models = await openai.models.list(); 19 | console.log('Models available:', models.data.map(m => m.id)); 20 | } catch (error) { 21 | console.error('Error listing models:', error); 22 | } 23 | 24 | try { 25 | // Try a simple chat completion without streaming 26 | console.log('\nAttempting a simple chat completion...'); 27 | const completion = await openai.chat.completions.create({ 28 | model: 'Qwen/QwQ-32B-AWQ', 29 | messages: [{ role: 'user', content: 'Hello, how are you?' }], 30 | stream: false, 31 | }); 32 | console.log('Chat completion successful:', completion); 33 | } catch (error) { 34 | console.error('Error with chat completion:', error); 35 | 36 | // Print more detailed error information 37 | if (error.response) { 38 | console.log('Response status:', error.response.status); 39 | console.log('Response headers:', error.response.headers); 40 | console.log('Response data:', error.response.data); 41 | } 42 | } 43 | } 44 | 45 | testConnection().catch(console.error); 46 | -------------------------------------------------------------------------------- /base_eks_setup/prometheus-monitoring.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | name: vllm 6 | namespace: monitoring 7 | labels: 8 | release: kube-prometheus-stack 9 | spec: 10 | endpoints: 11 | - path: '/metrics/' 12 | port: metrics 13 | selector: 14 | matchLabels: 15 | app.kubernetes.io/name: kuberay 16 | --- 17 | apiVersion: monitoring.coreos.com/v1 18 | kind: PodMonitor 19 | metadata: 20 | labels: 21 | release: prometheus 22 | name: kuberay-cluster 23 | namespace: monitoring # ns where prometheus is deployed 24 | spec: 25 | podMetricsEndpoints: 26 | - port: metrics 27 | path: '/metrics/' 28 | namespaceSelector: 29 | matchNames: 30 | - kuberay-system # ns where Ray cluster is deployed 31 | selector: 32 | matchLabels: 33 | app.kubernetes.io/name: kuberay 34 | --- 35 | apiVersion: monitoring.coreos.com/v1 36 | kind: PodMonitor 37 | metadata: 38 | name: ray-workers-monitor 39 | namespace: monitoring 40 | labels: 41 | # `release: $HELM_RELEASE`: Prometheus can only detect PodMonitor with this label. 42 | release: prometheus 43 | spec: 44 | jobLabel: ray-workers 45 | # Only select Kubernetes Pods in the "default" namespace. 46 | namespaceSelector: 47 | matchNames: 48 | - kuberay-system 49 | # Only select Kubernetes Pods with "matchLabels". 50 | selector: 51 | matchLabels: 52 | ray.io/node-type: worker 53 | # A list of endpoints allowed as part of this PodMonitor. 54 | podMetricsEndpoints: 55 | - port: metrics 56 | path: '/metrics/' 57 | relabelings: 58 | - sourceLabels: [__meta_kubernetes_pod_label_ray_io_cluster] 59 | targetLabel: ray_io_cluster 60 | -------------------------------------------------------------------------------- /model-observability/values.yaml.DEPRECATED: -------------------------------------------------------------------------------- 1 | # FOR DEMO USE ONY 2 | langfuse: 3 | additionalEnv: 4 | - name : REDIS_PORT 5 | value: "6379" 6 | # - name: LANGFUSE_S3_EVENT_UPLOAD_SECRET_ACCESS_KEY 7 | # value: "changeme" 8 | # valueFrom: 9 | # secretKeyRef: 10 | # name: langfuse-s3 11 | # key: root-password 12 | # - name: LANGFUSE_S3_BATCH_EXPORT_SECRET_ACCESS_KEY 13 | # value: "changeme" 14 | # valueFrom: 15 | # secretKeyRef: 16 | # name: langfuse-s3 17 | # key: root-password 18 | salt: 19 | value: TOO_SALTY 20 | nextauth: 21 | secret: 22 | value: "changeme" 23 | 24 | postgresql: 25 | auth: 26 | password: "changeme" 27 | 28 | clickhouse: 29 | auth: 30 | password: "changeme" 31 | 32 | redis: 33 | auth: 34 | password: "changeme" 35 | 36 | s3: 37 | secretAccessKey: 38 | value: "changeme" 39 | 40 | # FOR NON DEMO USE YOU SHYLD DEFINE THE SENSITIVE DATA IN SECRETS and/or use cloud storage services such as S3 41 | # langfuse: 42 | # salt: 43 | # secretKeyRef: 44 | # name: langfuse-general 45 | # key: salt 46 | # nextauth: 47 | # secret: 48 | # secretKeyRef: 49 | # name: langfuse-nextauth-secret 50 | # key: nextauth-secret 51 | 52 | # postgresql: 53 | # auth: 54 | # existingSecret: langfuse-postgresql-auth 55 | # secretKeys: 56 | # userPasswordKey: password 57 | 58 | # clickhouse: 59 | # auth: 60 | # existingSecret: langfuse-clickhouse-auth 61 | # secretKeys: 62 | # userPasswordKey: password 63 | 64 | # redis: 65 | # auth: 66 | # existingSecret: langfuse-redis-auth 67 | # secretKeys: 68 | # userPasswordKey: password -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/decision.py: -------------------------------------------------------------------------------- 1 | 2 | from langchain_openai import ChatOpenAI 3 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 4 | from langchain_core.messages import AIMessage, HumanMessage 5 | from langgraph.graph import StateGraph, START, END 6 | from langgraph.graph.message import add_messages 7 | from langgraph.checkpoint.memory import MemorySaver 8 | from langchain_core.messages import SystemMessage, HumanMessage 9 | 10 | from typing import Annotated, List 11 | from langchain.prompts.chat import HumanMessagePromptTemplate 12 | 13 | 14 | from typing_extensions import TypedDict 15 | 16 | import requests 17 | import json 18 | import base64 19 | 20 | import logging 21 | 22 | from langfuse import Langfuse 23 | from datetime import datetime, timedelta 24 | import os 25 | import math 26 | import openai 27 | 28 | from PyPDF2 import PdfReader 29 | 30 | from pathlib import Path 31 | 32 | from langgraph.pregel import RetryPolicy 33 | 34 | 35 | 36 | class State(TypedDict): 37 | messages: Annotated[list, add_messages] 38 | 39 | # Add new node for external processing 40 | async def external_automation_node(state: State) -> State: 41 | """ 42 | Node that calls external processing service 43 | """ 44 | # Get the last message content 45 | last_message = state["messages"][-1].content 46 | 47 | # Call external auto approve service 48 | 49 | return {"messages": [last_message]} 50 | 51 | async def external_human_node(state: State) -> State: 52 | """ 53 | Node that calls external processing service 54 | """ 55 | # Get the last message content 56 | last_message = state["messages"][-1].content 57 | 58 | # Call external auto approve service 59 | 60 | return {"messages": [last_message]} -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/scripts/embedKnowledge.ts: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | import { KnowledgeAgent } from "../agents"; 3 | import { logTitle } from "../utils"; 4 | 5 | // Function to run knowledge embedding as a standalone script 6 | async function runKnowledgeEmbedding() { 7 | logTitle('STANDALONE KNOWLEDGE EMBEDDING'); 8 | 9 | let knowledgeAgent: KnowledgeAgent | null = null; 10 | 11 | try { 12 | // Initialize the knowledge agent 13 | knowledgeAgent = new KnowledgeAgent(); 14 | await knowledgeAgent.init(); 15 | 16 | // Check for changes and embed if needed 17 | const hasChanges = await knowledgeAgent.checkForChanges(); 18 | 19 | if (hasChanges) { 20 | console.log('Changes detected, embedding all knowledge files (including CSV)...'); 21 | 22 | // Embed all knowledge files (markdown, text, JSON, and CSV) 23 | const result = await knowledgeAgent.embedKnowledge(); 24 | console.log(`Knowledge embedding result: ${result ? 'SUCCESS' : 'FAILED'}`); 25 | 26 | if (result) { 27 | console.log('Knowledge embedding completed successfully'); 28 | } else { 29 | console.error('Knowledge embedding failed'); 30 | process.exit(1); 31 | } 32 | } else { 33 | console.log('No changes detected in knowledge files'); 34 | } 35 | 36 | } catch (error) { 37 | console.error('Error in knowledge embedding:', error); 38 | process.exit(1); 39 | } finally { 40 | // Clean up 41 | if (knowledgeAgent) { 42 | await knowledgeAgent.close(); 43 | } 44 | } 45 | } 46 | 47 | // Main execution 48 | (async () => { 49 | await runKnowledgeEmbedding(); 50 | })(); 51 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/debug-no-tools.js: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import 'dotenv/config'; 3 | 4 | // Create a test script to debug the API connection with streaming 5 | async function testConnection() { 6 | console.log('Testing OpenAI API connection with streaming...'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set'); 9 | 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | defaultHeaders: { 14 | "api-key": process.env.OPENAI_API_KEY, 15 | "Authorization": `Bearer ${process.env.OPENAI_API_KEY}` 16 | } 17 | }); 18 | 19 | try { 20 | // Try a simple chat completion with streaming 21 | console.log('\nAttempting a chat completion with streaming...'); 22 | const stream = await openai.chat.completions.create({ 23 | model: 'Qwen/QwQ-32B-AWQ', 24 | messages: [{ role: 'user', content: 'Tell me a short joke' }], 25 | stream: true, 26 | }); 27 | 28 | console.log('Stream response started:'); 29 | for await (const chunk of stream) { 30 | process.stdout.write(chunk.choices[0]?.delta?.content || ''); 31 | } 32 | console.log('\nStream completed successfully'); 33 | } catch (error) { 34 | console.error('Error with chat completion:', error); 35 | 36 | // Print more detailed error information 37 | if (error.response) { 38 | console.log('Response status:', error.response.status); 39 | console.log('Response headers:', error.response.headers); 40 | try { 41 | console.log('Response data:', await error.response.text()); 42 | } catch (e) { 43 | console.log('Could not read response data'); 44 | } 45 | } 46 | } 47 | } 48 | 49 | testConnection().catch(console.error); 50 | -------------------------------------------------------------------------------- /milvus/milvus-standalone.yaml: -------------------------------------------------------------------------------- 1 | # kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.yaml 2 | # kubectl get pods -n cert-manager 3 | # kubectl apply -f https://raw.githubusercontent.com/zilliztech/milvus-operator/main/deploy/manifests/deployment.yaml 4 | # kubectl get pods -n milvus-operator 5 | # deploy Milvus in standalone mode 6 | # kubectl apply -f milvus-standalone.yaml 7 | # Uninstall milvus 8 | # kubectl delete milvus my-release 9 | # This is a sample to deploy a standalone milvus in milvus-operator's default configurations. 10 | # kubectl port-forward service/my-release-milvus 19530:19530 11 | # kubectl port-forward service/llama-cpp 8080:8080 12 | # curl --request POST --url http://localhost:8080/completion --header "Content-Type: application/json" --data '{"prompt": "What is llama3.2?","n_predict": 128}' 13 | apiVersion: milvus.io/v1beta1 14 | kind: Milvus 15 | metadata: 16 | name: my-release 17 | spec: 18 | mode: standalone 19 | components: 20 | nodeSelector: 21 | kubernetes.io/arch: arm64 22 | resources: 23 | requests: 24 | cpu: "1" 25 | dependencies: 26 | etcd: 27 | inCluster: 28 | deletionPolicy: Delete 29 | pvcDeletion: true 30 | values: 31 | nodeSelector: 32 | kubernetes.io/arch: arm64 33 | resources: 34 | requests: 35 | cpu: '1' 36 | pulsar: 37 | inCluster: 38 | deletionPolicy: Delete 39 | pvcDeletion: true 40 | values: 41 | nodeSelector: 42 | kubernetes.io/arch: arm64 43 | resources: 44 | requests: 45 | cpu: '1' 46 | storage: 47 | inCluster: 48 | deletionPolicy: Delete 49 | pvcDeletion: true 50 | values: 51 | nodeSelector: 52 | kubernetes.io/arch: arm64 53 | resources: 54 | requests: 55 | cpu: '1' -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/startup-main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | echo "Starting Strands SDK Agentic RAG Main Application (Clean Mode)..." 5 | 6 | # Load environment variables from ConfigMap or local file 7 | if [ -f "/app/config/.env" ]; then 8 | echo "Loading environment variables from ConfigMap .env file..." 9 | export $(grep -v '^#' /app/config/.env | xargs) 10 | echo "Environment variables loaded successfully from ConfigMap" 11 | elif [ -f "/app/.env" ]; then 12 | echo "Loading environment variables from local .env file..." 13 | export $(grep -v '^#' /app/.env | xargs) 14 | echo "Environment variables loaded successfully from local file" 15 | else 16 | echo "WARNING: No .env file found. Using environment variables from Kubernetes." 17 | fi 18 | 19 | # Verify critical environment variables 20 | echo "Verifying critical environment variables..." 21 | if [ -z "$LITELLM_API_KEY" ] && [ -z "$OPENAI_API_KEY" ]; then 22 | echo "ERROR: Neither LITELLM_API_KEY nor OPENAI_API_KEY is set" 23 | exit 1 24 | fi 25 | 26 | if [ -z "$OPENSEARCH_ENDPOINT" ]; then 27 | echo "ERROR: OPENSEARCH_ENDPOINT is not set" 28 | exit 1 29 | fi 30 | 31 | if [ -z "$AWS_REGION" ]; then 32 | echo "ERROR: AWS_REGION is not set" 33 | exit 1 34 | fi 35 | 36 | echo "Critical environment variables verified" 37 | 38 | echo "Starting FastAPI server with clean mode..." 39 | echo "Server will be available on port 8000" 40 | echo "API Documentation available at http://localhost:8000/docs" 41 | 42 | # Load .env and run the server 43 | python -c " 44 | from dotenv import load_dotenv 45 | import os 46 | 47 | # Try to load from ConfigMap first, then fallback to local 48 | if os.path.exists('/app/config/.env'): 49 | load_dotenv('/app/config/.env') 50 | print('Loaded environment from ConfigMap') 51 | elif os.path.exists('/app/.env'): 52 | load_dotenv('/app/.env') 53 | print('Loaded environment from local file') 54 | 55 | # Now run the server 56 | from src.server import run_server 57 | run_server() 58 | " 59 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "strandsdk-agentic-rag-opensearch" 7 | version = "1.0.0" 8 | description = "Multi-Agent RAG system with MCP and OpenSearch using Strands SDK" 9 | authors = [ 10 | {name = "Your Name", email = "your.email@example.com"} 11 | ] 12 | readme = "README.md" 13 | requires-python = ">=3.9" 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | ] 24 | dependencies = [ 25 | "strands-agents>=0.1.0", 26 | "strands-tools>=0.1.0", 27 | "openai>=1.0.0", 28 | "mcp>=1.0.0", 29 | "fastmcp>=0.9.0", 30 | "boto3>=1.34.0", 31 | "opensearch-py>=2.4.0", 32 | "aws-requests-auth>=0.4.3", 33 | "numpy>=1.24.0", 34 | "scikit-learn>=1.3.0", 35 | "pandas>=2.0.0", 36 | "python-dotenv>=1.0.0", 37 | "langfuse>=2.0.0", 38 | "pydantic>=2.0.0", 39 | "aiofiles>=23.0.0", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | dev = [ 44 | "pytest>=7.0.0", 45 | "pytest-asyncio>=0.21.0", 46 | "black>=23.0.0", 47 | "isort>=5.12.0", 48 | "flake8>=6.0.0", 49 | "mypy>=1.0.0", 50 | ] 51 | 52 | [project.scripts] 53 | embed-knowledge = "src.scripts.embed_knowledge:main" 54 | run-agents = "src.main:main" 55 | test-agents = "src.test_agents:main" 56 | 57 | [tool.setuptools.packages.find] 58 | where = ["."] 59 | include = ["src*"] 60 | 61 | [tool.black] 62 | line-length = 88 63 | target-version = ['py39'] 64 | 65 | [tool.isort] 66 | profile = "black" 67 | line_length = 88 68 | 69 | [tool.mypy] 70 | python_version = "3.9" 71 | warn_return_any = true 72 | warn_unused_configs = true 73 | disallow_untyped_defs = true 74 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/MCPClient.ts: -------------------------------------------------------------------------------- 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js"; 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; 3 | import { Tool } from "@modelcontextprotocol/sdk/types.js"; 4 | 5 | export default class MCPClient { 6 | public mcp: Client; 7 | private command: string; 8 | private args: string[] 9 | private transport: StdioClientTransport | null = null; 10 | private tools: Tool[] = []; 11 | 12 | constructor(name: string, command: string, args: string[], version?: string) { 13 | this.mcp = new Client({ name, version: version || "0.0.1" }); 14 | this.command = command; 15 | this.args = args; 16 | } 17 | 18 | public async init() { 19 | await this.connectToServer(); 20 | } 21 | 22 | public async close() { 23 | await this.mcp.close(); 24 | } 25 | 26 | public getTools() { 27 | return this.tools; 28 | } 29 | 30 | public callTool(name: string, params: Record) { 31 | return this.mcp.callTool({ 32 | name, 33 | arguments: params, 34 | }); 35 | } 36 | 37 | private async connectToServer() { 38 | try { 39 | this.transport = new StdioClientTransport({ 40 | command: this.command, 41 | args: this.args, 42 | }); 43 | await this.mcp.connect(this.transport); 44 | 45 | const toolsResult = await this.mcp.listTools(); 46 | this.tools = toolsResult.tools.map((tool) => { 47 | return { 48 | name: tool.name, 49 | description: tool.description, 50 | inputSchema: tool.inputSchema, 51 | }; 52 | }); 53 | console.log( 54 | "Connected to server with tools:", 55 | this.tools.map(({ name }) => name) 56 | ); 57 | } catch (e) { 58 | console.log("Failed to connect to MCP server: ", e); 59 | throw e; 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/test-mcp-server.js: -------------------------------------------------------------------------------- 1 | // Test script to check MCP server functionality 2 | import { spawn } from 'child_process'; 3 | import path from 'path'; 4 | 5 | const outPath = path.resolve(process.cwd(), 'output'); 6 | console.log(`Testing MCP server with output path: ${outPath}`); 7 | 8 | // Start the MCP server process 9 | const mcpProcess = spawn('npx', ['-y', '@modelcontextprotocol/server-filesystem', outPath], { 10 | stdio: ['pipe', 'pipe', 'pipe'] 11 | }); 12 | 13 | // Log server output 14 | mcpProcess.stdout.on('data', (data) => { 15 | console.log(`MCP Server stdout: ${data}`); 16 | }); 17 | 18 | mcpProcess.stderr.on('data', (data) => { 19 | console.error(`MCP Server stderr: ${data}`); 20 | }); 21 | 22 | // Send a test message to the server after it starts 23 | setTimeout(() => { 24 | try { 25 | console.log('Sending test message to MCP server...'); 26 | const message = { 27 | jsonrpc: '2.0', 28 | id: '1', 29 | method: 'listTools', 30 | params: {} 31 | }; 32 | 33 | mcpProcess.stdin.write(JSON.stringify(message) + '\n'); 34 | 35 | // Wait for response and then try to call the write_file tool 36 | setTimeout(() => { 37 | console.log('Attempting to call write_file tool...'); 38 | const writeFileMessage = { 39 | jsonrpc: '2.0', 40 | id: '2', 41 | method: 'callTool', 42 | params: { 43 | name: 'write_file', 44 | arguments: { 45 | path: 'mcp-test.md', 46 | content: '# MCP Test\n\nThis file was created using the MCP server.' 47 | } 48 | } 49 | }; 50 | 51 | mcpProcess.stdin.write(JSON.stringify(writeFileMessage) + '\n'); 52 | 53 | // Close the server after testing 54 | setTimeout(() => { 55 | console.log('Closing MCP server...'); 56 | mcpProcess.kill(); 57 | process.exit(0); 58 | }, 2000); 59 | }, 1000); 60 | } catch (error) { 61 | console.error(`Error sending message to MCP server: ${error}`); 62 | mcpProcess.kill(); 63 | process.exit(1); 64 | } 65 | }, 2000); 66 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/test-endpoint.js: -------------------------------------------------------------------------------- 1 | // Simple test script to check LLM endpoint connectivity 2 | import 'dotenv/config'; 3 | import { OpenAI } from 'openai'; 4 | 5 | async function testEndpoint() { 6 | console.log('Testing LLM endpoint with a simple request'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | 9 | // Create OpenAI client with simplified configuration 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | defaultHeaders: { 14 | // Using only one authentication method 15 | "Authorization": `Bearer ${process.env.OPENAI_API_KEY}` 16 | } 17 | }); 18 | 19 | // Simple message without tools 20 | const messages = [ 21 | { role: "user", content: "Hello, can you respond with a simple greeting?" } 22 | ]; 23 | 24 | try { 25 | console.log('Sending request...'); 26 | 27 | // Try with the original model name format 28 | const modelName = 'Qwen/QwQ-32B-AWQ'; 29 | console.log('Using model:', modelName); 30 | 31 | const response = await openai.chat.completions.create({ 32 | model: modelName, 33 | messages: messages, 34 | stream: false 35 | }); 36 | 37 | console.log('Success! Response:'); 38 | console.log(JSON.stringify(response, null, 2)); 39 | return response; 40 | } catch (error) { 41 | console.error('Error occurred:'); 42 | console.error('Status:', error.status); 43 | console.error('Headers:', error.headers); 44 | 45 | if (error.response) { 46 | try { 47 | const responseText = await error.response.text(); 48 | console.error('Response body:', responseText); 49 | try { 50 | const responseJson = JSON.parse(responseText); 51 | console.error('Response JSON:', JSON.stringify(responseJson, null, 2)); 52 | } catch (parseError) { 53 | // If not valid JSON, the text version is already logged 54 | } 55 | } catch (e) { 56 | console.error('Could not read response data:', e); 57 | } 58 | } 59 | 60 | throw error; 61 | } 62 | } 63 | 64 | // Run the test 65 | testEndpoint() 66 | .then(() => console.log('Test completed successfully')) 67 | .catch(err => console.error('Test failed:', err.message)); 68 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/explore_collection.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from pymilvus import connections, Collection 4 | 5 | # Load environment variables 6 | load_dotenv() 7 | 8 | # Get Milvus address from environment 9 | milvus_address = os.getenv("MILVUS_ADDRESS", "localhost:19530") 10 | milvus_host, milvus_port = milvus_address.split(":") 11 | 12 | # Collection name 13 | collection_name = "rag_documents" 14 | 15 | # Connect to Milvus 16 | print(f"Connecting to Milvus at {milvus_host}:{milvus_port}...") 17 | try: 18 | connections.connect( 19 | alias="default", 20 | host=milvus_host, 21 | port=milvus_port 22 | ) 23 | 24 | # Get collection 25 | collection = Collection(collection_name) 26 | 27 | # 1. Check the schema 28 | schema = collection.schema 29 | print("\n1. Collection Schema:") 30 | print(f"Collection name: {collection_name}") 31 | print("Fields:") 32 | for field in schema.fields: 33 | print(f" - {field.name}: {field.dtype} (is_primary_key: {field.is_primary})") 34 | if hasattr(field, 'params') and field.params: 35 | print(f" Vector dimension: {field.params.get('dim')}") 36 | 37 | # 2. Get number of entities 38 | num_entities = collection.num_entities 39 | print(f"\n2. Number of entities: {num_entities}") 40 | 41 | # 3. Examine index information 42 | print("\n3. Index Information:") 43 | try: 44 | index_info = collection.index().params 45 | print(f"Index params: {index_info}") 46 | except Exception as e: 47 | print(f"Could not get index params: {e}") 48 | 49 | # Get more detailed information about vector field indexes 50 | try: 51 | for field_name in collection.index_info.keys(): 52 | field_index = collection.index_info.get(field_name) 53 | if field_index: 54 | print(f"\nField '{field_name}' index:") 55 | print(f" Index type: {field_index.get('index_type')}") 56 | print(f" Metric type: {field_index.get('metric_type')}") 57 | print(f" Params: {field_index.get('params')}") 58 | except Exception as e: 59 | print(f"Could not get detailed index info: {e}") 60 | 61 | # Close connection 62 | connections.disconnect("default") 63 | 64 | except Exception as e: 65 | print(f"Error exploring Milvus collection: {e}") 66 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/embedKnowledge.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import EmbeddingRetriever from "./EmbeddingRetriever"; 4 | import { logTitle } from "./utils"; 5 | 6 | // Function to process and embed knowledge files 7 | async function embedKnowledgeFiles() { 8 | logTitle('EMBEDDING KNOWLEDGE FILES'); 9 | 10 | const knowledgePath = path.join(process.cwd(), 'knowledge'); 11 | 12 | if (!fs.existsSync(knowledgePath)) { 13 | console.error(`Knowledge directory not found: ${knowledgePath}`); 14 | return false; 15 | } 16 | 17 | try { 18 | // Get all markdown files in the knowledge directory 19 | const files = fs.readdirSync(knowledgePath) 20 | .filter(file => file.endsWith('.md')); 21 | 22 | console.log(`Found ${files.length} markdown files in the knowledge directory`); 23 | 24 | // Initialize the embedding retriever 25 | const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model"); 26 | 27 | // Process each file 28 | for (const file of files) { 29 | const filePath = path.join(knowledgePath, file); 30 | console.log(`Processing file: ${file}`); 31 | 32 | // Read the file content 33 | const content = fs.readFileSync(filePath, 'utf-8'); 34 | 35 | // Embed the document 36 | await embeddingRetriever.embedDocument(content); 37 | } 38 | 39 | console.log(`Successfully embedded ${files.length} knowledge files`); 40 | 41 | // Close Milvus connection when done 42 | // @ts-ignore - Access private property for cleanup 43 | if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') { 44 | // @ts-ignore 45 | await embeddingRetriever.vectorStore.close(); 46 | } 47 | 48 | return true; 49 | } catch (error) { 50 | console.error("Error embedding knowledge files:", error); 51 | return false; 52 | } 53 | } 54 | 55 | // Main function 56 | (async () => { 57 | const success = await embedKnowledgeFiles(); 58 | 59 | if (success) { 60 | console.log("Knowledge embedding completed successfully"); 61 | } else { 62 | console.error("Knowledge embedding failed"); 63 | process.exit(1); 64 | } 65 | })(); 66 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/embedKnowledge.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import EmbeddingRetriever from "./EmbeddingRetriever"; 4 | import { logTitle } from "./utils"; 5 | 6 | // Function to process and embed knowledge files 7 | async function embedKnowledgeFiles() { 8 | logTitle('EMBEDDING KNOWLEDGE FILES'); 9 | 10 | const knowledgePath = path.join(process.cwd(), '..', 'knowledge'); 11 | 12 | if (!fs.existsSync(knowledgePath)) { 13 | console.error(`Knowledge directory not found: ${knowledgePath}`); 14 | return false; 15 | } 16 | 17 | try { 18 | // Get all markdown files in the knowledge directory 19 | const files = fs.readdirSync(knowledgePath) 20 | .filter(file => file.endsWith('.md')); 21 | 22 | console.log(`Found ${files.length} markdown files in the knowledge directory`); 23 | 24 | // Initialize the embedding retriever with llamacpp-embedding model 25 | const embeddingRetriever = new EmbeddingRetriever("llamacpp-embedding"); 26 | 27 | // Process each file 28 | for (const file of files) { 29 | const filePath = path.join(knowledgePath, file); 30 | console.log(`Processing file: ${file}`); 31 | 32 | // Read the file content 33 | const content = fs.readFileSync(filePath, 'utf-8'); 34 | 35 | // Embed the document 36 | await embeddingRetriever.embedDocument(content); 37 | } 38 | 39 | console.log(`Successfully embedded ${files.length} knowledge files`); 40 | 41 | // Close OpenSearch connection when done 42 | // @ts-ignore - Access private property for cleanup 43 | if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') { 44 | // @ts-ignore 45 | await embeddingRetriever.vectorStore.close(); 46 | } 47 | 48 | return true; 49 | } catch (error) { 50 | console.error("Error embedding knowledge files:", error); 51 | return false; 52 | } 53 | } 54 | 55 | // Main function 56 | (async () => { 57 | const success = await embedKnowledgeFiles(); 58 | 59 | if (success) { 60 | console.log("Knowledge embedding completed successfully"); 61 | } else { 62 | console.error("Knowledge embedding failed"); 63 | process.exit(1); 64 | } 65 | })(); 66 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/debug-tools.js: -------------------------------------------------------------------------------- 1 | import OpenAI from 'openai'; 2 | import 'dotenv/config'; 3 | 4 | // Create a test script to debug the API connection with tools 5 | async function testConnection() { 6 | console.log('Testing OpenAI API connection with tools...'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | console.log('API Key:', process.env.OPENAI_API_KEY ? 'Set (masked)' : 'Not set'); 9 | 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | defaultHeaders: { 14 | "api-key": process.env.OPENAI_API_KEY, 15 | "Authorization": `Bearer ${process.env.OPENAI_API_KEY}` 16 | } 17 | }); 18 | 19 | // Define a simple tool 20 | const tools = [ 21 | { 22 | type: "function", 23 | function: { 24 | name: "get_current_weather", 25 | description: "Get the current weather in a given location", 26 | parameters: { 27 | type: "object", 28 | properties: { 29 | location: { 30 | type: "string", 31 | description: "The city and state, e.g. San Francisco, CA", 32 | }, 33 | unit: { 34 | type: "string", 35 | enum: ["celsius", "fahrenheit"], 36 | description: "The temperature unit to use", 37 | }, 38 | }, 39 | required: ["location"], 40 | }, 41 | }, 42 | } 43 | ]; 44 | 45 | try { 46 | // Try a simple chat completion without streaming 47 | console.log('\nAttempting a chat completion with tools...'); 48 | const completion = await openai.chat.completions.create({ 49 | model: 'Qwen/QwQ-32B-AWQ', 50 | messages: [{ role: 'user', content: 'What\'s the weather like in Seattle?' }], 51 | stream: false, 52 | tools: tools, 53 | }); 54 | console.log('Chat completion successful:', JSON.stringify(completion, null, 2)); 55 | } catch (error) { 56 | console.error('Error with chat completion:', error); 57 | 58 | // Print more detailed error information 59 | if (error.response) { 60 | console.log('Response status:', error.response.status); 61 | console.log('Response headers:', error.response.headers); 62 | try { 63 | console.log('Response data:', await error.response.text()); 64 | } catch (e) { 65 | console.log('Could not read response data'); 66 | } 67 | } 68 | } 69 | } 70 | 71 | testConnection().catch(console.error); 72 | -------------------------------------------------------------------------------- /base_eks_setup/karpenter_nodepool/graviton-nodepool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Graviton Node Pool for ARM-based workloads 3 | apiVersion: karpenter.sh/v1 4 | kind: NodePool 5 | metadata: 6 | name: graviton-inference 7 | spec: 8 | limits: 9 | cpu: 512 10 | memory: 4096Gi 11 | disruption: 12 | consolidationPolicy: WhenEmptyOrUnderutilized 13 | consolidateAfter: 30m 14 | template: 15 | metadata: 16 | labels: 17 | model-inferencing: "graviton-inference" 18 | ray-control-plane: "false" 19 | spec: 20 | nodeClassRef: 21 | group: karpenter.k8s.aws 22 | kind: EC2NodeClass 23 | name: graviton-inference 24 | expireAfter: 8h 25 | requirements: 26 | - key: karpenter.k8s.aws/instance-category 27 | operator: In 28 | values: 29 | - c 30 | - m 31 | - r 32 | - key: karpenter.k8s.aws/instance-generation 33 | operator: Gt 34 | values: ["6"] 35 | - key: kubernetes.io/arch 36 | operator: In 37 | values: ["arm64"] 38 | - key: kubernetes.io/os 39 | operator: In 40 | values: ["linux"] 41 | - key: karpenter.sh/capacity-type 42 | operator: In 43 | values: ["on-demand"] 44 | --- 45 | apiVersion: karpenter.k8s.aws/v1 46 | kind: EC2NodeClass 47 | metadata: 48 | name: graviton-inference 49 | spec: 50 | kubelet: 51 | podsPerCore: 2 52 | maxPods: 20 53 | systemReserved: 54 | cpu: 100m 55 | memory: 100Mi 56 | subnetSelectorTerms: 57 | - tags: 58 | Environment: dev 59 | Name: ${CLUSTER_NAME}-private-* 60 | karpenter.sh/discovery: ${CLUSTER_NAME} 61 | securityGroupSelectorTerms: 62 | - tags: 63 | Environment: dev 64 | karpenter.sh/discovery: ${CLUSTER_NAME} 65 | amiFamily: "AL2023" 66 | amiSelectorTerms: 67 | - name: "amazon-eks-node-al2023-arm64-standard-1.33-*" 68 | role: KarpenterNode-${CLUSTER_NAME} 69 | tags: 70 | Environment: dev 71 | karpenter.sh/discovery: ${CLUSTER_NAME} 72 | model-inferencing: "graviton-inference" 73 | ray-control-plane: "false" 74 | Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters 75 | blockDeviceMappings: 76 | - deviceName: /dev/xvda 77 | ebs: 78 | volumeSize: 100Gi 79 | volumeType: gp3 80 | iops: 10000 81 | encrypted: false 82 | deleteOnTermination: true 83 | throughput: 256 84 | -------------------------------------------------------------------------------- /base_eks_setup/karpenter_nodepool/x86-nodepool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # x86 Node Pool for x86-based workloads 3 | apiVersion: karpenter.sh/v1 4 | kind: NodePool 5 | metadata: 6 | name: x86-inference 7 | spec: 8 | limits: 9 | cpu: 512 10 | memory: 4096Gi 11 | disruption: 12 | consolidationPolicy: WhenEmptyOrUnderutilized 13 | consolidateAfter: 30s 14 | template: 15 | metadata: 16 | labels: 17 | model-inferencing: "x86-inference" 18 | ray-control-plane: "false" 19 | spec: 20 | nodeClassRef: 21 | group: karpenter.k8s.aws 22 | kind: EC2NodeClass 23 | name: x86-inference 24 | taints: 25 | - key: "model-inferencing" 26 | value: "x86-inference" 27 | effect: NoSchedule 28 | expireAfter: 8h 29 | requirements: 30 | - key: karpenter.k8s.aws/instance-category 31 | operator: In 32 | values: 33 | - c 34 | - m 35 | - r 36 | - key: karpenter.k8s.aws/instance-generation 37 | operator: Gt 38 | values: ["6"] 39 | - key: kubernetes.io/arch 40 | operator: In 41 | values: ["amd64"] 42 | - key: kubernetes.io/os 43 | operator: In 44 | values: ["linux"] 45 | - key: karpenter.sh/capacity-type 46 | operator: In 47 | values: ["on-demand"] 48 | --- 49 | apiVersion: karpenter.k8s.aws/v1 50 | kind: EC2NodeClass 51 | metadata: 52 | name: x86-inference 53 | spec: 54 | kubelet: 55 | podsPerCore: 2 56 | maxPods: 20 57 | systemReserved: 58 | cpu: 100m 59 | memory: 100Mi 60 | subnetSelectorTerms: 61 | - tags: 62 | Environment: dev 63 | Name: ${CLUSTER_NAME}-private-* 64 | karpenter.sh/discovery: ${CLUSTER_NAME} 65 | securityGroupSelectorTerms: 66 | - tags: 67 | Environment: dev 68 | karpenter.sh/discovery: ${CLUSTER_NAME} 69 | amiFamily: "AL2023" 70 | amiSelectorTerms: 71 | - name: "amazon-eks-node-al2023-x86_64-standard-1.29-*" 72 | role: KarpenterNode-${CLUSTER_NAME} 73 | tags: 74 | Environment: dev 75 | karpenter.sh/discovery: ${CLUSTER_NAME} 76 | model-inferencing: "x86-inference" 77 | ray-control-plane: "false" 78 | Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters 79 | blockDeviceMappings: 80 | - deviceName: /dev/xvda 81 | ebs: 82 | volumeSize: 100Gi 83 | volumeType: gp3 84 | encrypted: false 85 | deleteOnTermination: true 86 | -------------------------------------------------------------------------------- /base_eks_setup/karpenter_nodepool/inf2-nodepool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Inf2 Node Pool for ML inference workloads 3 | apiVersion: karpenter.sh/v1 4 | kind: NodePool 5 | metadata: 6 | name: inf2-inference 7 | spec: 8 | limits: 9 | cpu: 512 10 | memory: 4096Gi 11 | disruption: 12 | consolidationPolicy: WhenEmptyOrUnderutilized 13 | consolidateAfter: 30m 14 | template: 15 | metadata: 16 | labels: 17 | model-inferencing: "inf2-inference" 18 | ray-control-plane: "false" 19 | spec: 20 | nodeClassRef: 21 | group: karpenter.k8s.aws 22 | kind: EC2NodeClass 23 | name: inf2-inference 24 | taints: 25 | - key: "model-inferencing" 26 | value: "inf2-inference" 27 | effect: NoSchedule 28 | expireAfter: 8h 29 | requirements: 30 | - key: karpenter.k8s.aws/instance-family 31 | operator: In 32 | values: ["inf2"] 33 | - key: kubernetes.io/arch 34 | operator: In 35 | values: ["amd64"] 36 | - key: kubernetes.io/os 37 | operator: In 38 | values: ["linux"] 39 | - key: karpenter.sh/capacity-type 40 | operator: In 41 | values: ["on-demand"] 42 | - key: karpenter.k8s.aws/instance-accelerator-manufacturer 43 | operator: In 44 | values: ["aws"] 45 | --- 46 | apiVersion: karpenter.k8s.aws/v1 47 | kind: EC2NodeClass 48 | metadata: 49 | name: inf2-inference 50 | spec: 51 | kubelet: 52 | podsPerCore: 2 53 | maxPods: 20 54 | systemReserved: 55 | cpu: 500m 56 | memory: 900Mi 57 | subnetSelectorTerms: 58 | - tags: 59 | Environment: dev 60 | Name: ${CLUSTER_NAME}-private-* 61 | karpenter.sh/discovery: ${CLUSTER_NAME} 62 | securityGroupSelectorTerms: 63 | - tags: 64 | Environment: dev 65 | karpenter.sh/discovery: ${CLUSTER_NAME} 66 | amiFamily: "AL2023" 67 | amiSelectorTerms: 68 | - name: "amazon-eks-node-al2023-x86_64-neuron-1.33-v*" 69 | role: KarpenterNode-${CLUSTER_NAME} 70 | tags: 71 | Environment: dev 72 | karpenter.sh/discovery: ${CLUSTER_NAME} 73 | model-inferencing: "inf2-inference" 74 | ray-control-plane: "false" 75 | Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters 76 | blockDeviceMappings: 77 | - deviceName: /dev/xvda 78 | ebs: 79 | volumeSize: 300Gi 80 | volumeType: gp3 81 | iops: 8000 82 | encrypted: false 83 | deleteOnTermination: true 84 | throughput: 256 85 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/LangfuseConfig.ts: -------------------------------------------------------------------------------- 1 | import { Langfuse } from 'langfuse'; 2 | import 'dotenv/config'; 3 | 4 | // Validate environment variables 5 | const publicKey = process.env.LANGFUSE_PUBLIC_KEY; 6 | const secretKey = process.env.LANGFUSE_SECRET_KEY; 7 | const host = process.env.LANGFUSE_HOST; 8 | 9 | if (!publicKey || !secretKey || !host) { 10 | console.warn('Langfuse configuration incomplete. Some environment variables are missing:'); 11 | console.warn('- LANGFUSE_PUBLIC_KEY:', publicKey ? '✓' : '✗'); 12 | console.warn('- LANGFUSE_SECRET_KEY:', secretKey ? '✓' : '✗'); 13 | console.warn('- LANGFUSE_HOST:', host ? '✓' : '✗'); 14 | console.warn('Langfuse tracing will be disabled.'); 15 | } 16 | 17 | // Initialize Langfuse client 18 | export const langfuse = publicKey && secretKey && host ? new Langfuse({ 19 | secretKey, 20 | publicKey, 21 | baseUrl: host, 22 | flushAt: 1, // Send traces immediately for development 23 | }) : null; 24 | 25 | // Helper function to create a trace 26 | export function createTrace(name: string, input?: any, metadata?: any) { 27 | if (!langfuse) { 28 | console.warn('Langfuse not configured, skipping trace creation'); 29 | return null; 30 | } 31 | 32 | return langfuse.trace({ 33 | name, 34 | input, 35 | metadata: { 36 | ...metadata, 37 | timestamp: new Date().toISOString(), 38 | environment: 'development' 39 | } 40 | }); 41 | } 42 | 43 | // Helper function to create a span within a trace 44 | export function createSpan(trace: any, name: string, input?: any, metadata?: any) { 45 | if (!trace) { 46 | return null; 47 | } 48 | 49 | return trace.span({ 50 | name, 51 | input, 52 | metadata: { 53 | ...metadata, 54 | timestamp: new Date().toISOString() 55 | } 56 | }); 57 | } 58 | 59 | // Helper function to create a generation (LLM call) within a trace 60 | export function createGeneration(trace: any, name: string, input?: any, model?: string, metadata?: any) { 61 | if (!trace) { 62 | return null; 63 | } 64 | 65 | return trace.generation({ 66 | name, 67 | input, 68 | model, 69 | metadata: { 70 | ...metadata, 71 | timestamp: new Date().toISOString() 72 | } 73 | }); 74 | } 75 | 76 | // Helper function to flush traces (useful for cleanup) 77 | export async function flushLangfuse() { 78 | if (langfuse) { 79 | await langfuse.flushAsync(); 80 | } 81 | } 82 | 83 | // Export configuration status 84 | export const isLangfuseEnabled = !!langfuse; 85 | 86 | console.log(`Langfuse tracing: ${isLangfuseEnabled ? 'ENABLED' : 'DISABLED'}`); 87 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/.env.example: -------------------------------------------------------------------------------- 1 | # LiteLLM Configuration for Reasoning Models 2 | LITELLM_API_KEY=your-litellm-api-key 3 | LITELLM_BASE_URL=http://your-litellm-server:8080/v1 4 | REASONING_MODEL=qwen-qwq-32b-preview 5 | 6 | # Embedding Configuration (separate from reasoning) 7 | EMBEDDING_API_KEY=your-embedding-api-key 8 | EMBEDDING_BASE_URL=http://your-embedding-server:8080/v1 9 | EMBEDDING_MODEL=llamacpp-embedding 10 | 11 | 12 | # AWS Configuration 13 | AWS_REGION=us-east-1 14 | OPENSEARCH_ENDPOINT=https://your-opensearch-domain.region.es.amazonaws.com 15 | 16 | # Tavily Web Search Configuration 17 | TAVILY_API_KEY=your-tavily-api-key 18 | 19 | # Optional: Langfuse for observability 20 | LANGFUSE_HOST=https://cloud.langfuse.com 21 | LANGFUSE_PUBLIC_KEY=your-public-key 22 | LANGFUSE_SECRET_KEY=your-secret-key 23 | 24 | # Application Settings 25 | KNOWLEDGE_DIR=knowledge 26 | OUTPUT_DIR=output 27 | VECTOR_INDEX_NAME=knowledge-embeddings 28 | TOP_K_RESULTS=5 29 | BYPASS_TOOL_CONSENT=true 30 | 31 | # Configuration Notes: 32 | # 33 | # LITELLM_API_KEY: API key for your LiteLLM server hosting reasoning models 34 | # LITELLM_BASE_URL: Endpoint for LiteLLM server (e.g., http://localhost:8080/v1) 35 | # REASONING_MODEL: Model name for agent reasoning tasks (e.g., qwen-qwq-32b-preview) 36 | # 37 | # EMBEDDING_API_KEY: API key for embedding service (can be same as LITELLM_API_KEY) 38 | # EMBEDDING_BASE_URL: Endpoint for embedding generation (can be same as LITELLM_BASE_URL) 39 | # EMBEDDING_MODEL: Model name for generating embeddings (e.g., llamacpp-embedding) 40 | # 41 | # OPENAI_*: Legacy configuration for backward compatibility 42 | # DEFAULT_MODEL: Fallback model ID if LiteLLM fails 43 | # 44 | # AWS_REGION: AWS region for OpenSearch and other AWS services 45 | # OPENSEARCH_ENDPOINT: Your AWS OpenSearch domain endpoint 46 | # 47 | # TAVILY_API_KEY: API key for Tavily web search service (get from https://tavily.com) 48 | # 49 | # LANGFUSE_*: Optional observability tracking (leave empty to disable) 50 | # 51 | # KNOWLEDGE_DIR: Directory containing knowledge files to embed 52 | # OUTPUT_DIR: Directory for generated outputs and reports 53 | # VECTOR_INDEX_NAME: OpenSearch index name for vector storage 54 | # TOP_K_RESULTS: Default number of search results to return 55 | # 56 | # Model Usage: 57 | # - Reasoning Tasks (All Agents): Uses REASONING_MODEL via LiteLLM 58 | # - Embedding Tasks (RAG): Uses EMBEDDING_MODEL via embedding endpoint 59 | # - Web Search: Uses Tavily API for real-time information when RAG relevance is low 60 | # - Both LLM models can point to the same LiteLLM server with different model names 61 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/test-agents.ts: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | import { KnowledgeAgent, RAGAgent, MCPAgent, SupervisorAgent } from "./agents"; 3 | import MCPClient from "./MCPClient"; 4 | import { logTitle } from "./utils"; 5 | import path from "path"; 6 | 7 | // Simple test to verify all agents can be initialized 8 | async function testAgents() { 9 | logTitle('TESTING MULTI-AGENT SYSTEM'); 10 | 11 | try { 12 | // Test Knowledge Agent 13 | console.log('Testing Knowledge Agent...'); 14 | const knowledgeAgent = new KnowledgeAgent(); 15 | await knowledgeAgent.init(); 16 | const hasChanges = await knowledgeAgent.checkForChanges(); 17 | console.log(`Knowledge changes detected: ${hasChanges}`); 18 | await knowledgeAgent.close(); 19 | console.log('✓ Knowledge Agent test passed'); 20 | 21 | // Test RAG Agent 22 | console.log('\nTesting RAG Agent...'); 23 | const ragAgent = new RAGAgent(); 24 | await ragAgent.init(); 25 | const stats = ragAgent.getStats(); 26 | console.log('RAG Agent stats:', stats); 27 | await ragAgent.close(); 28 | console.log('✓ RAG Agent test passed'); 29 | 30 | // Test MCP Agent 31 | console.log('\nTesting MCP Agent...'); 32 | const outPath = path.resolve(process.cwd(), 'output'); 33 | const fileMCP = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]); 34 | const mcpAgent = new MCPAgent([fileMCP]); 35 | await mcpAgent.init(); 36 | const mcpStats = mcpAgent.getStats(); 37 | console.log('MCP Agent stats:', mcpStats); 38 | const tools = await mcpAgent.listAvailableTools(); 39 | console.log(`Available tools: ${tools.length}`); 40 | await mcpAgent.close(); 41 | console.log('✓ MCP Agent test passed'); 42 | 43 | // Test Supervisor Agent 44 | console.log('\nTesting Supervisor Agent...'); 45 | const fileMCP2 = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]); 46 | const supervisor = new SupervisorAgent([fileMCP2]); 47 | await supervisor.init(); 48 | console.log('Supervisor Agent initialized successfully'); 49 | await supervisor.close(); 50 | console.log('✓ Supervisor Agent test passed'); 51 | 52 | console.log('\n🎉 All agent tests passed successfully!'); 53 | 54 | } catch (error) { 55 | console.error('❌ Agent test failed:', error); 56 | process.exit(1); 57 | } 58 | } 59 | 60 | // Run the test 61 | (async () => { 62 | await testAgents(); 63 | })(); 64 | -------------------------------------------------------------------------------- /base_eks_setup/karpenter_nodepool/gpu-nodepool.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # GPU Node Pool for ML inference workloads 3 | apiVersion: karpenter.sh/v1 4 | kind: NodePool 5 | metadata: 6 | name: gpu-inference 7 | spec: 8 | limits: 9 | cpu: 1024 10 | memory: 8192Gi 11 | disruption: 12 | consolidationPolicy: WhenEmptyOrUnderutilized 13 | consolidateAfter: 30m 14 | template: 15 | metadata: 16 | labels: 17 | model-inferencing: "gpu-inference" 18 | ray-control-plane: "false" 19 | nvidia.com/gpu: "present" 20 | spec: 21 | nodeClassRef: 22 | group: karpenter.k8s.aws 23 | kind: EC2NodeClass 24 | name: gpu-inference 25 | expireAfter: 8h 26 | requirements: 27 | - key: karpenter.k8s.aws/instance-category 28 | operator: In 29 | values: 30 | - g 31 | - key: karpenter.k8s.aws/instance-family 32 | operator: In 33 | values: ["g5", "g6"] 34 | - key: kubernetes.io/arch 35 | operator: In 36 | values: ["amd64"] 37 | - key: kubernetes.io/os 38 | operator: In 39 | values: ["linux"] 40 | - key: karpenter.sh/capacity-type 41 | operator: In 42 | values: ["on-demand"] 43 | - key: karpenter.k8s.aws/instance-gpu-manufacturer 44 | operator: In 45 | values: ["nvidia"] 46 | - key: karpenter.k8s.aws/instance-gpu-count 47 | operator: In 48 | values: ["4"] 49 | --- 50 | apiVersion: karpenter.k8s.aws/v1 51 | kind: EC2NodeClass 52 | metadata: 53 | name: gpu-inference 54 | spec: 55 | kubelet: 56 | podsPerCore: 2 57 | maxPods: 20 58 | systemReserved: 59 | cpu: 500m 60 | memory: 900Mi 61 | subnetSelectorTerms: 62 | - tags: 63 | Environment: dev 64 | Name: ${CLUSTER_NAME}-private-* 65 | karpenter.sh/discovery: ${CLUSTER_NAME} 66 | securityGroupSelectorTerms: 67 | - tags: 68 | Environment: dev 69 | karpenter.sh/discovery: ${CLUSTER_NAME} 70 | amiFamily: "AL2023" 71 | amiSelectorTerms: 72 | - name: "amazon-eks-node-al2023-x86_64-nvidia-1.33-v*" 73 | role: KarpenterNode-${CLUSTER_NAME} 74 | tags: 75 | Environment: dev 76 | karpenter.sh/discovery: ${CLUSTER_NAME} 77 | model-inferencing: "gpu-inference" 78 | ray-control-plane: "false" 79 | Provisioned-By: aws-solutions-library-samples/guidance-for-automated-provisioning-of-application-ready-amazon-eks-clusters 80 | blockDeviceMappings: 81 | - deviceName: /dev/xvda 82 | ebs: 83 | volumeSize: 500Gi 84 | volumeType: gp3 85 | iops: 10000 86 | encrypted: false 87 | deleteOnTermination: true 88 | throughput: 512 89 | -------------------------------------------------------------------------------- /agentic-apps/agentic-idp/storage.py: -------------------------------------------------------------------------------- 1 | from langchain_openai import ChatOpenAI 2 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 3 | from langchain_core.messages import AIMessage, HumanMessage 4 | from langgraph.graph import StateGraph, START, END 5 | from langgraph.graph.message import add_messages 6 | from langgraph.checkpoint.memory import MemorySaver 7 | from langchain_core.messages import SystemMessage, HumanMessage 8 | 9 | from typing import Annotated, List 10 | from langchain.prompts.chat import HumanMessagePromptTemplate 11 | 12 | 13 | from typing_extensions import TypedDict 14 | 15 | import requests 16 | import json 17 | import base64 18 | 19 | import logging 20 | 21 | from langfuse import Langfuse 22 | from datetime import datetime, timedelta 23 | import os 24 | import math 25 | import openai 26 | 27 | from PyPDF2 import PdfReader 28 | 29 | from pathlib import Path 30 | 31 | from langgraph.pregel import RetryPolicy 32 | 33 | from decision import State 34 | 35 | # External function to store the data in s3 36 | async def call_store_service(text: str) -> str: 37 | """ 38 | External function to process text through a service 39 | """ 40 | try: 41 | # Example API call - replace with your actual service endpoint 42 | # response = requests.post( 43 | # "http://your-service-endpoint/process", 44 | # json={"text": text} 45 | # ) 46 | # return response.json() 47 | print(f"Making storage Call with data {text}") 48 | return {"result": "success"} 49 | except Exception as e: 50 | logging.error(f"External service error: {str(e)}") 51 | return {"error": str(e)} 52 | 53 | # Add new node for external processing 54 | async def external_storage_node(state: State) -> State: 55 | """ 56 | Node that calls external processing service 57 | """ 58 | # Get the last message content 59 | # last_message = state["messages"] 60 | # translated = [state["messages"][0]] + [AIMessage(content=msg.content) for msg in state["messages"][1:]] 61 | ai_messages = [msg for msg in state["messages"] if isinstance(msg, AIMessage)] 62 | print(f"AI Messages to Store: {json.dumps([msg.content for msg in ai_messages], indent=2)}") 63 | 64 | 65 | # [-1].content 66 | print(f"Data to Store {ai_messages}") 67 | print(json.dumps([msg.content for msg in ai_messages], indent=2)) 68 | 69 | # Call external service 70 | result = await call_store_service(json.dumps([msg.content for msg in ai_messages], indent=2)) 71 | 72 | # Create new message with processed result 73 | processed_message = HumanMessage( 74 | content=f"External Processing Results: {json.dumps(result, indent=2)}" 75 | ) 76 | 77 | return {"messages": [processed_message]} -------------------------------------------------------------------------------- /model-hosting/standalone-llamacpp-embedding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: llamacpp-embedding-server 6 | annotations: 7 | kubernetes.io/pvc-protection: "false" 8 | spec: 9 | accessModes: 10 | - ReadWriteOnce 11 | resources: 12 | requests: 13 | storage: 100Gi 14 | storageClassName: gp3 15 | volumeMode: Filesystem 16 | --- 17 | apiVersion: apps/v1 18 | kind: Deployment 19 | metadata: 20 | name: llamacpp-embedding-server 21 | labels: 22 | app: llamacpp-embedding-server 23 | spec: 24 | replicas: 1 25 | selector: 26 | matchLabels: 27 | app: llamacpp-embedding-server 28 | template: 29 | metadata: 30 | labels: 31 | app: llamacpp-embedding-server 32 | spec: 33 | nodeSelector: 34 | kubernetes.io/arch: arm64 35 | affinity: 36 | nodeAffinity: 37 | requiredDuringSchedulingIgnoredDuringExecution: 38 | nodeSelectorTerms: 39 | - matchExpressions: 40 | - key: kubernetes.io/arch 41 | operator: In 42 | values: 43 | - arm64 44 | volumes: 45 | - name: cache-volume 46 | persistentVolumeClaim: 47 | claimName: llamacpp-embedding-server 48 | containers: 49 | - name: llamacpp-embedding-server 50 | image: ghcr.io/ggml-org/llama.cpp:server 51 | args: [ 52 | "--model-url https://huggingface.co/ChristianAzinn/snowflake-arctic-embed-s-gguf/blob/main/snowflake-arctic-embed-s-f16.GGUF --port 8000 --host 0.0.0.0 --embedding --threads 16" 53 | ] 54 | env: 55 | - name: OMP_NUM_THREADS 56 | value: "16" 57 | ports: 58 | - containerPort: 8000 59 | resources: 60 | limits: 61 | memory: 32Gi 62 | requests: 63 | cpu: "16" 64 | memory: 32Gi 65 | volumeMounts: 66 | - mountPath: /models 67 | name: cache-volume 68 | livenessProbe: 69 | httpGet: 70 | path: /health 71 | port: 8000 72 | initialDelaySeconds: 120 73 | periodSeconds: 10 74 | failureThreshold: 15 75 | successThreshold: 1 76 | readinessProbe: 77 | httpGet: 78 | path: /health 79 | port: 8000 80 | initialDelaySeconds: 120 81 | periodSeconds: 10 82 | 83 | --- 84 | apiVersion: v1 85 | kind: Service 86 | metadata: 87 | name: llamacpp-embedding-server 88 | spec: 89 | ports: 90 | - name: http-llamacpp-embedding-server 91 | port: 8000 92 | protocol: TCP 93 | targetPort: 8000 94 | selector: 95 | app: llamacpp-embedding-server 96 | sessionAffinity: None 97 | type: ClusterIP 98 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/embedCSV.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { parse } from "csv-parse/sync"; 4 | import EmbeddingRetriever from "./EmbeddingRetriever"; 5 | import { logTitle } from "./utils"; 6 | 7 | // Function to process and embed CSV data 8 | async function processCSVFile(filePath: string) { 9 | logTitle('PROCESSING CSV FILE'); 10 | console.log(`Processing file: ${filePath}`); 11 | 12 | try { 13 | // Read the CSV file 14 | const fileContent = fs.readFileSync(filePath, 'utf-8'); 15 | 16 | // Parse the CSV content 17 | const records = parse(fileContent, { 18 | columns: true, 19 | skip_empty_lines: true 20 | }); 21 | 22 | console.log(`Found ${records.length} records in the CSV file`); 23 | 24 | // Initialize the embedding retriever with llamacpp-embedding model 25 | const embeddingRetriever = new EmbeddingRetriever("llamacpp-embedding"); 26 | 27 | // Process each record 28 | let processedCount = 0; 29 | for (const record of records) { 30 | // Combine question and context for better retrieval 31 | const documentText = `Question: ${record.question}\nContext: ${record.context}`; 32 | 33 | // Embed the document 34 | await embeddingRetriever.embedDocument(documentText); 35 | 36 | processedCount++; 37 | if (processedCount % 10 === 0) { 38 | console.log(`Processed ${processedCount}/${records.length} records`); 39 | } 40 | } 41 | 42 | console.log(`Successfully embedded ${processedCount} records from the CSV file`); 43 | 44 | // Close OpenSearch connection when done 45 | // @ts-ignore - Access private property for cleanup 46 | if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') { 47 | // @ts-ignore 48 | await embeddingRetriever.vectorStore.close(); 49 | } 50 | 51 | return true; 52 | } catch (error) { 53 | console.error("Error processing CSV file:", error); 54 | return false; 55 | } 56 | } 57 | 58 | // Main function 59 | (async () => { 60 | const csvFilePath = path.join(process.cwd(), '..', 'knowledge', 'q_c_data.csv'); 61 | 62 | if (!fs.existsSync(csvFilePath)) { 63 | console.error(`File not found: ${csvFilePath}`); 64 | process.exit(1); 65 | } 66 | 67 | const success = await processCSVFile(csvFilePath); 68 | 69 | if (success) { 70 | console.log("CSV processing completed successfully"); 71 | } else { 72 | console.error("CSV processing failed"); 73 | process.exit(1); 74 | } 75 | })(); 76 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/Dockerfile.main: -------------------------------------------------------------------------------- 1 | # Dockerfile for Main Application (FastAPI Server) 2 | FROM python:3.11-slim as base 3 | 4 | # Set environment variables 5 | ENV PYTHONUNBUFFERED=1 6 | ENV PYTHONDONTWRITEBYTECODE=1 7 | ENV DEBIAN_FRONTEND=noninteractive 8 | 9 | # Install system dependencies 10 | RUN apt-get update && apt-get install -y \ 11 | curl \ 12 | git \ 13 | build-essential \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Set working directory 17 | WORKDIR /app 18 | 19 | # Copy requirements first for better caching 20 | COPY requirements.txt . 21 | 22 | # Install Python dependencies in stages to resolve conflicts 23 | # First install core dependencies without strands-agents-tools 24 | RUN pip install --no-cache-dir \ 25 | openai>=1.0.0 \ 26 | litellm>=1.0.0 \ 27 | mcp>=1.0.0 \ 28 | fastmcp>=0.9.0 \ 29 | fastapi>=0.104.0 \ 30 | uvicorn>=0.24.0 \ 31 | boto3>=1.34.0 \ 32 | opensearch-py>=2.4.0 \ 33 | aws-requests-auth>=0.4.3 34 | 35 | # Install datasets and ragas with compatible dill version 36 | RUN pip install --no-cache-dir \ 37 | "dill>=0.3.0,<0.3.8" \ 38 | "datasets==2.14.0" \ 39 | "pyarrow>=12.0.0,<15.0.0" \ 40 | "ragas>=0.1.0,<0.2.0" 41 | 42 | # Install compatible langchain versions (use older langchain-aws that works with older langchain-core) 43 | RUN pip install --no-cache-dir \ 44 | "langchain>=0.1.0,<0.3.0" \ 45 | "langchain-core>=0.1.0,<0.3.0" \ 46 | "langchain-aws>=0.1.0,<0.2.0" 47 | 48 | # Install strands packages (which will upgrade dill but should work) 49 | RUN pip install --no-cache-dir \ 50 | strands-agents>=0.1.0 \ 51 | strands-agents-tools>=0.1.0 \ 52 | "strands-agents[litellm]>=0.1.0" 53 | 54 | # Install remaining dependencies 55 | RUN pip install --no-cache-dir \ 56 | "numpy>=1.24.0,<2.0.0" \ 57 | "scikit-learn>=1.3.0" \ 58 | "pandas>=2.0.0" \ 59 | "python-dotenv>=1.0.0" \ 60 | "requests>=2.31.0" \ 61 | "httpx>=0.25.0" \ 62 | "langfuse>=2.0.0" \ 63 | "pydantic>=2.0.0" \ 64 | "aiofiles>=23.0.0" \ 65 | "python-multipart>=0.0.6" 66 | 67 | # Copy the entire application 68 | COPY . . 69 | 70 | # Create necessary directories 71 | RUN mkdir -p knowledge output logs 72 | 73 | # Set proper permissions 74 | RUN chmod +x scripts/*.py 2>/dev/null || true 75 | 76 | # Copy startup and health check scripts 77 | COPY startup-main.sh /app/startup-main.sh 78 | COPY healthcheck-main.sh /app/healthcheck-main.sh 79 | 80 | # Make startup and health check scripts executable 81 | RUN chmod +x /app/startup-main.sh /app/healthcheck-main.sh 82 | 83 | # Expose the FastAPI server port 84 | EXPOSE 8000 85 | 86 | # Add health check 87 | HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ 88 | CMD /app/healthcheck-main.sh 89 | 90 | # Set the default command 91 | CMD ["/app/startup-main.sh"] 92 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/test-endpoint-with-tools.js: -------------------------------------------------------------------------------- 1 | // Test script to check LLM endpoint with tools 2 | import 'dotenv/config'; 3 | import { OpenAI } from 'openai'; 4 | 5 | async function testEndpointWithTools() { 6 | console.log('Testing LLM endpoint with tools'); 7 | console.log('Base URL:', process.env.OPENAI_BASE_URL); 8 | 9 | // Create OpenAI client with simplified configuration 10 | const openai = new OpenAI({ 11 | apiKey: process.env.OPENAI_API_KEY, 12 | baseURL: process.env.OPENAI_BASE_URL, 13 | defaultHeaders: { 14 | // Using only one authentication method 15 | "Authorization": `Bearer ${process.env.OPENAI_API_KEY}` 16 | } 17 | }); 18 | 19 | // Simple message with tools 20 | const messages = [ 21 | { role: "user", content: "What's the weather like in Seattle?" } 22 | ]; 23 | 24 | // Define a simple tool 25 | const tools = [ 26 | { 27 | type: "function", 28 | function: { 29 | name: "get_weather", 30 | description: "Get the current weather in a location", 31 | parameters: { 32 | type: "object", 33 | properties: { 34 | location: { 35 | type: "string", 36 | description: "The city and state, e.g. San Francisco, CA" 37 | } 38 | }, 39 | required: ["location"] 40 | } 41 | } 42 | } 43 | ]; 44 | 45 | try { 46 | console.log('Sending request with tools...'); 47 | 48 | // Use the original model name format that worked in the simple test 49 | const modelName = 'Qwen/QwQ-32B-AWQ'; 50 | console.log('Using model:', modelName); 51 | 52 | const response = await openai.chat.completions.create({ 53 | model: modelName, 54 | messages: messages, 55 | tools: tools, 56 | tool_choice: "auto", 57 | stream: false 58 | }); 59 | 60 | console.log('Success! Response:'); 61 | console.log(JSON.stringify(response, null, 2)); 62 | return response; 63 | } catch (error) { 64 | console.error('Error occurred:'); 65 | console.error('Status:', error.status); 66 | console.error('Headers:', error.headers); 67 | 68 | if (error.response) { 69 | try { 70 | const responseText = await error.response.text(); 71 | console.error('Response body:', responseText); 72 | try { 73 | const responseJson = JSON.parse(responseText); 74 | console.error('Response JSON:', JSON.stringify(responseJson, null, 2)); 75 | } catch (parseError) { 76 | // If not valid JSON, the text version is already logged 77 | } 78 | } catch (e) { 79 | console.error('Could not read response data:', e); 80 | } 81 | } 82 | 83 | throw error; 84 | } 85 | } 86 | 87 | // Run the test 88 | testEndpointWithTools() 89 | .then(() => console.log('Test completed successfully')) 90 | .catch(err => console.error('Test failed:', err.message)); 91 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/updateRAG.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { parse } from "csv-parse/sync"; 4 | import EmbeddingRetriever from "./EmbeddingRetriever"; 5 | import { logTitle } from "./utils"; 6 | 7 | // Function to update the RAG process with CSV data 8 | async function updateRAGWithCSV() { 9 | logTitle('UPDATING RAG WITH CSV DATA'); 10 | 11 | const csvFilePath = path.join(process.cwd(), '..', 'knowledge', 'q_c_data.csv'); 12 | 13 | if (!fs.existsSync(csvFilePath)) { 14 | console.error(`File not found: ${csvFilePath}`); 15 | return false; 16 | } 17 | 18 | try { 19 | // Read the CSV file 20 | const fileContent = fs.readFileSync(csvFilePath, 'utf-8'); 21 | 22 | // Parse the CSV content 23 | const records = parse(fileContent, { 24 | columns: true, 25 | skip_empty_lines: true 26 | }); 27 | 28 | console.log(`Found ${records.length} records in the CSV file`); 29 | 30 | // Initialize the embedding retriever 31 | const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model"); 32 | 33 | // Process each record in batches to avoid overwhelming the system 34 | const batchSize = 50; 35 | let processedCount = 0; 36 | 37 | for (let i = 0; i < records.length; i += batchSize) { 38 | const batch = records.slice(i, i + batchSize); 39 | 40 | // Process batch in parallel 41 | await Promise.all(batch.map(async (record) => { 42 | // Combine question and context for better retrieval 43 | const documentText = `Question: ${record.question}\nContext: ${record.context}`; 44 | 45 | // Embed the document 46 | await embeddingRetriever.embedDocument(documentText); 47 | })); 48 | 49 | processedCount += batch.length; 50 | console.log(`Processed ${processedCount}/${records.length} records`); 51 | } 52 | 53 | console.log(`Successfully embedded ${processedCount} records from the CSV file`); 54 | 55 | // Close Milvus connection when done 56 | // @ts-ignore - Access private property for cleanup 57 | if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') { 58 | // @ts-ignore 59 | await embeddingRetriever.vectorStore.close(); 60 | } 61 | 62 | return true; 63 | } catch (error) { 64 | console.error("Error updating RAG with CSV data:", error); 65 | return false; 66 | } 67 | } 68 | 69 | // Main function 70 | (async () => { 71 | const success = await updateRAGWithCSV(); 72 | 73 | if (success) { 74 | console.log("RAG update completed successfully"); 75 | } else { 76 | console.error("RAG update failed"); 77 | process.exit(1); 78 | } 79 | })(); 80 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/test-langfuse.ts: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | import { createTrace, createSpan, createGeneration, flushLangfuse, isLangfuseEnabled } from './LangfuseConfig'; 3 | 4 | async function testLangfuseIntegration() { 5 | console.log('Testing Langfuse Integration...'); 6 | console.log('Langfuse enabled:', isLangfuseEnabled); 7 | 8 | if (!isLangfuseEnabled) { 9 | console.log('Langfuse is not enabled. Please check your environment variables.'); 10 | return; 11 | } 12 | 13 | // Create a test trace 14 | const trace = createTrace( 15 | 'test-trace', 16 | { message: 'Testing Langfuse integration' }, 17 | { testType: 'integration-test' } 18 | ); 19 | 20 | if (!trace) { 21 | console.log('Failed to create trace'); 22 | return; 23 | } 24 | 25 | console.log('Created trace successfully'); 26 | 27 | // Create a test span 28 | const span = createSpan( 29 | trace, 30 | 'test-span', 31 | { operation: 'test-operation' }, 32 | { spanType: 'test' } 33 | ); 34 | 35 | if (span) { 36 | console.log('Created span successfully'); 37 | 38 | // Simulate some work 39 | await new Promise(resolve => setTimeout(resolve, 100)); 40 | 41 | // End the span 42 | span.end({ 43 | output: { result: 'test completed successfully' } 44 | }); 45 | 46 | console.log('Ended span successfully'); 47 | } 48 | 49 | // Create a test generation 50 | const generation = createGeneration( 51 | trace, 52 | 'test-generation', 53 | { prompt: 'Test prompt' }, 54 | 'test-model', 55 | { generationType: 'test' } 56 | ); 57 | 58 | if (generation) { 59 | console.log('Created generation successfully'); 60 | 61 | // Simulate LLM response 62 | await new Promise(resolve => setTimeout(resolve, 200)); 63 | 64 | // End the generation 65 | generation.end({ 66 | output: { response: 'Test response from LLM' }, 67 | usage: { 68 | promptTokens: 10, 69 | completionTokens: 5, 70 | totalTokens: 15 71 | } 72 | }); 73 | 74 | console.log('Ended generation successfully'); 75 | } 76 | 77 | // Update the main trace 78 | trace.update({ 79 | output: { 80 | success: true, 81 | message: 'Test completed successfully' 82 | } 83 | }); 84 | 85 | console.log('Updated trace successfully'); 86 | 87 | // Flush traces 88 | console.log('Flushing traces...'); 89 | await flushLangfuse(); 90 | console.log('Traces flushed successfully'); 91 | 92 | console.log('Langfuse integration test completed!'); 93 | console.log('Check your Langfuse dashboard to see the test traces.'); 94 | } 95 | 96 | // Run the test 97 | testLangfuseIntegration().catch(console.error); 98 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/config.py: -------------------------------------------------------------------------------- 1 | """Configuration management for the multi-agent RAG system.""" 2 | 3 | import os 4 | from typing import Optional 5 | from dotenv import load_dotenv 6 | 7 | # Load environment variables 8 | load_dotenv() 9 | 10 | class Config: 11 | """Configuration class for the application.""" 12 | 13 | # LiteLLM Configuration for Reasoning Models 14 | LITELLM_API_KEY: str = os.getenv("LITELLM_API_KEY", os.getenv("OPENAI_API_KEY", "")) 15 | LITELLM_BASE_URL: str = os.getenv("LITELLM_BASE_URL", os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")) 16 | REASONING_MODEL: str = os.getenv("REASONING_MODEL", "qwen-qwq-32b-preview") 17 | 18 | # Embedding Configuration (separate from reasoning) 19 | EMBEDDING_API_KEY: str = os.getenv("EMBEDDING_API_KEY", os.getenv("OPENAI_API_KEY", "")) 20 | EMBEDDING_BASE_URL: str = os.getenv("EMBEDDING_BASE_URL", os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")) 21 | EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "llamacpp-embedding") 22 | 23 | # Legacy OpenAI Configuration (for backward compatibility) 24 | OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "") 25 | OPENAI_BASE_URL: str = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") 26 | DEFAULT_MODEL: str = os.getenv("DEFAULT_MODEL", os.getenv("REASONING_MODEL", "qwen-qwq-32b-preview")) 27 | 28 | # AWS Configuration 29 | AWS_REGION: str = os.getenv("AWS_REGION", "us-east-1") 30 | OPENSEARCH_ENDPOINT: str = os.getenv("OPENSEARCH_ENDPOINT", "") 31 | 32 | # Tavily MCP Configuration 33 | TAVILY_MCP_SERVICE_URL: str = os.getenv("TAVILY_MCP_SERVICE_URL", "http://localhost:8001/mcp") 34 | 35 | # Langfuse Configuration 36 | LANGFUSE_HOST: str = os.getenv("LANGFUSE_HOST", "") 37 | LANGFUSE_PUBLIC_KEY: str = os.getenv("LANGFUSE_PUBLIC_KEY", "") 38 | LANGFUSE_SECRET_KEY: str = os.getenv("LANGFUSE_SECRET_KEY", "") 39 | 40 | # Application Configuration 41 | KNOWLEDGE_DIR: str = os.getenv("KNOWLEDGE_DIR", "knowledge") 42 | OUTPUT_DIR: str = os.getenv("OUTPUT_DIR", "output") 43 | EMBEDDING_ENDPOINT: str = os.getenv("EMBEDDING_ENDPOINT", "") 44 | 45 | # Vector Search Configuration 46 | VECTOR_INDEX_NAME: str = os.getenv("VECTOR_INDEX_NAME", "knowledge-embeddings") 47 | TOP_K_RESULTS: int = int(os.getenv("TOP_K_RESULTS", "5")) 48 | 49 | @classmethod 50 | def is_langfuse_enabled(cls) -> bool: 51 | """Check if Langfuse is properly configured.""" 52 | return bool(cls.LANGFUSE_HOST and cls.LANGFUSE_PUBLIC_KEY and cls.LANGFUSE_SECRET_KEY) 53 | 54 | @classmethod 55 | def validate_config(cls) -> None: 56 | """Validate required configuration.""" 57 | required_vars = [ 58 | ("LITELLM_API_KEY", cls.LITELLM_API_KEY), 59 | ("OPENSEARCH_ENDPOINT", cls.OPENSEARCH_ENDPOINT), 60 | ] 61 | 62 | missing_vars = [name for name, value in required_vars if not value] 63 | 64 | if missing_vars: 65 | raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") 66 | 67 | # Global config instance 68 | config = Config() 69 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/MULTI_AGENT_GUIDE.md: -------------------------------------------------------------------------------- 1 | # Multi-Agent System Guide 2 | 3 | ## Overview 4 | 5 | This guide explains how to use the multi-agent RAG system that has been implemented to replace the single-agent architecture. 6 | 7 | ## Architecture Changes 8 | 9 | ### Before (Single Agent) 10 | - Single `Agent` class handling all responsibilities 11 | - Direct embedding and retrieval in main flow 12 | - Simple workflow execution 13 | 14 | ### After (Multi-Agent) 15 | - **SupervisorAgent**: Orchestrates the entire workflow 16 | - **KnowledgeAgent**: Manages knowledge embedding and change detection 17 | - **RAGAgent**: Handles context retrieval and semantic search 18 | - **MCPAgent**: Manages tool interactions and LLM communication 19 | 20 | ## Usage 21 | 22 | ### 1. Running the Multi-Agent System 23 | ```bash 24 | pnpm dev 25 | ``` 26 | 27 | ### 2. Embedding Knowledge (Standalone) 28 | ```bash 29 | # Embed all knowledge files (markdown, text, JSON, and CSV) with change detection 30 | pnpm embed-knowledge 31 | ``` 32 | 33 | ### 3. Testing the System 34 | ```bash 35 | pnpm test-agents 36 | ``` 37 | 38 | ## Key Features 39 | 40 | ### Automatic Change Detection 41 | - The KnowledgeAgent monitors file changes using hashes 42 | - Only processes modified files for efficiency 43 | - Maintains metadata across runs 44 | 45 | ### Intelligent Workflow 46 | 1. **Knowledge Check**: Automatically detects and embeds new/changed files 47 | 2. **Context Retrieval**: Uses RAG to find relevant information 48 | 3. **Task Execution**: Leverages MCP tools to complete tasks 49 | 50 | ### Error Handling 51 | - Each agent handles its own errors gracefully 52 | - Supervisor provides comprehensive error reporting 53 | - Resource cleanup is handled properly 54 | 55 | ## Agent Responsibilities 56 | 57 | ### SupervisorAgent 58 | - Initializes and coordinates all sub-agents 59 | - Manages the complete workflow execution 60 | - Provides task tracking and result summaries 61 | - Handles cleanup and resource management 62 | 63 | ### KnowledgeAgent 64 | - Scans knowledge directory for changes 65 | - Embeds new or modified documents 66 | - Supports multiple file formats (MD, TXT, JSON, CSV) in a unified process 67 | - Maintains change detection metadata 68 | 69 | ### RAGAgent 70 | - Performs semantic search using embeddings 71 | - Retrieves relevant context for queries 72 | - Supports advanced features like reranking 73 | - Optimizes context for LLM consumption 74 | 75 | ### MCPAgent 76 | - Manages LLM interactions with tool support 77 | - Handles multi-turn conversations 78 | - Processes tool calls through MCP protocol 79 | - Maintains conversation context 80 | 81 | ## Configuration 82 | 83 | The system uses the same environment variables as before: 84 | - `OPENAI_API_KEY`: Your OpenAI API key 85 | - `OPENAI_BASE_URL`: Your model hosting endpoint 86 | - `OPENSEARCH_ENDPOINT`: Your OpenSearch endpoint 87 | - `AWS_REGION`: Your AWS region 88 | 89 | ## Benefits 90 | 91 | 1. **Modularity**: Each agent has a specific purpose 92 | 2. **Maintainability**: Easier to modify individual components 93 | 3. **Scalability**: Agents can be scaled independently 94 | 4. **Reliability**: Better error isolation and handling 95 | 5. **Extensibility**: Easy to add new agent types 96 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/embedCSV.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { parse } from "csv-parse/sync"; 4 | import EmbeddingRetriever from "./EmbeddingRetriever"; 5 | import { logTitle } from "./utils"; 6 | 7 | // Function to process and embed CSV data 8 | async function embedCSVData() { 9 | logTitle('EMBEDDING CSV DATA'); 10 | 11 | const csvPath = path.join(process.cwd(), 'knowledge'); 12 | 13 | if (!fs.existsSync(csvPath)) { 14 | console.error(`CSV data directory not found: ${csvPath}`); 15 | return false; 16 | } 17 | 18 | try { 19 | // Get all CSV files in the data directory 20 | const files = fs.readdirSync(csvPath) 21 | .filter(file => file.endsWith('.csv')); 22 | 23 | console.log(`Found ${files.length} CSV files in the data directory`); 24 | 25 | // Initialize the embedding retriever 26 | const embeddingRetriever = new EmbeddingRetriever("custom-embedding-model"); 27 | 28 | // Process each file 29 | for (const file of files) { 30 | const filePath = path.join(csvPath, file); 31 | console.log(`Processing file: ${file}`); 32 | 33 | // Read the file content 34 | const content = fs.readFileSync(filePath, 'utf-8'); 35 | 36 | // Parse CSV 37 | const records = parse(content, { 38 | columns: true, 39 | skip_empty_lines: true 40 | }); 41 | 42 | console.log(`Found ${records.length} records in ${file}`); 43 | 44 | // Process each record 45 | for (const record of records) { 46 | // Convert record to a string representation 47 | const recordString = Object.entries(record) 48 | .map(([key, value]) => `${key}: ${value}`) 49 | .join('\n'); 50 | 51 | // Create a document with metadata 52 | const document = `# ${record.name || record.title || record.id || 'Record'}\n\n${recordString}`; 53 | 54 | // Embed the document 55 | await embeddingRetriever.embedDocument(document); 56 | } 57 | 58 | console.log(`Successfully embedded ${records.length} records from ${file}`); 59 | } 60 | 61 | // Close Milvus connection when done 62 | // @ts-ignore - Access private property for cleanup 63 | if (embeddingRetriever.vectorStore && typeof embeddingRetriever.vectorStore.close === 'function') { 64 | // @ts-ignore 65 | await embeddingRetriever.vectorStore.close(); 66 | } 67 | 68 | return true; 69 | } catch (error) { 70 | console.error("Error embedding CSV data:", error); 71 | return false; 72 | } 73 | } 74 | 75 | // Main function 76 | (async () => { 77 | const success = await embedCSVData(); 78 | 79 | if (success) { 80 | console.log("CSV data embedding completed successfully"); 81 | } else { 82 | console.error("CSV data embedding failed"); 83 | process.exit(1); 84 | } 85 | })(); 86 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/index.ts: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | import MCPClient from "./MCPClient"; 3 | import { SupervisorAgent } from "./agents"; 4 | import path from "path"; 5 | import fs from "fs"; 6 | import { logTitle } from "./utils"; 7 | 8 | // Verify environment variables are loaded 9 | if (!process.env.AWS_REGION || !process.env.OPENSEARCH_ENDPOINT) { 10 | throw new Error('Required environment variables AWS_REGION and OPENSEARCH_ENDPOINT are not set'); 11 | } 12 | 13 | // Use the parent directory (where the command is run) instead of src directory 14 | const outPath = path.resolve(process.cwd(), 'output'); 15 | const TASK = ` 16 | Find information about "What is the most important aspect of initial treatment for Bell's palsy?". 17 | Summarize this information and create a comprehensive story about Bell's palsy treatment. 18 | Save the story and important information to a file named "bells_palsy_treatment.md" in the output directory as a beautiful markdown file. 19 | Include sections for: 20 | 1. Overview of Bell's palsy 21 | 2. Most important initial treatment aspects 22 | 3. Timeline for treatment 23 | 4. Expected outcomes 24 | 5. Additional recommendations 25 | ` 26 | 27 | // Make sure output directory exists 28 | if (!fs.existsSync(outPath)) { 29 | fs.mkdirSync(outPath, { recursive: true }); 30 | } 31 | 32 | // Start the multi-agent application 33 | (async () => { 34 | try { 35 | logTitle('INITIALIZING MULTI-AGENT RAG SYSTEM'); 36 | 37 | // Initialize the filesystem MCP client 38 | const fileMCP = new MCPClient("filesystem", "npx", ['-y', '@modelcontextprotocol/server-filesystem', outPath]); 39 | 40 | await main(fileMCP); 41 | } catch (error) { 42 | console.error("Error in main:", error); 43 | process.exit(1); 44 | } 45 | })(); 46 | 47 | async function main(fileMCP: MCPClient) { 48 | let supervisor: SupervisorAgent | null = null; 49 | 50 | try { 51 | // Initialize the supervisor agent with MCP clients 52 | logTitle('INITIALIZING SUPERVISOR AGENT'); 53 | supervisor = new SupervisorAgent([fileMCP], 'Qwen/QwQ-32B-AWQ'); 54 | await supervisor.init(); 55 | 56 | // Execute the complete workflow 57 | logTitle('EXECUTING MULTI-AGENT WORKFLOW'); 58 | console.log('Task:', TASK); 59 | 60 | const result = await supervisor.executeWorkflow(TASK); 61 | 62 | // Display results 63 | logTitle('WORKFLOW COMPLETED'); 64 | console.log('Final Result:', result); 65 | 66 | // Display workflow summary 67 | const summary = supervisor.getWorkflowSummary(); 68 | console.log('\n' + summary); 69 | 70 | // Display detailed task results 71 | const taskResults = supervisor.getTaskResults(); 72 | console.log('\nDetailed Task Results:'); 73 | for (const [taskId, result] of taskResults) { 74 | console.log(`- ${taskId}: ${result.success ? 'SUCCESS' : 'FAILED'}`); 75 | if (result.error) { 76 | console.log(` Error: ${result.error}`); 77 | } 78 | } 79 | 80 | } catch (error) { 81 | console.error("Error in workflow execution:", error); 82 | throw error; 83 | } finally { 84 | // Clean up resources 85 | if (supervisor) { 86 | logTitle('CLEANING UP'); 87 | await supervisor.close(); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/k8s/tavily-mcp-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: tavily-mcp-server 5 | labels: 6 | app: tavily-mcp-server 7 | component: mcp-server 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | app: tavily-mcp-server 13 | template: 14 | metadata: 15 | labels: 16 | app: tavily-mcp-server 17 | component: mcp-server 18 | spec: 19 | containers: 20 | - name: tavily-mcp-server 21 | image: 22 | # Fixed command to run MCP server directly with proper host binding 23 | command: 24 | - python 25 | - -c 26 | - | 27 | from dotenv import load_dotenv 28 | import os 29 | # Load environment variables from ConfigMap or local file 30 | if os.path.exists('/app/config/.env'): 31 | load_dotenv('/app/config/.env') 32 | print('Loaded environment from ConfigMap') 33 | elif os.path.exists('/app/.env'): 34 | load_dotenv('/app/.env') 35 | print('Loaded environment from local file') 36 | # Import and run MCP server with host binding to 0.0.0.0 37 | from src.mcp_servers.tavily_search_server import mcp 38 | mcp.run(transport='streamable-http', port=8001, host='0.0.0.0') 39 | ports: 40 | - containerPort: 8001 41 | name: http 42 | env: 43 | # Tavily Configuration 44 | - name: TAVILY_API_KEY 45 | valueFrom: 46 | secretKeyRef: 47 | name: app-secrets 48 | key: tavily-api-key 49 | # LiteLLM Configuration (for potential future use) 50 | - name: LITELLM_API_KEY 51 | valueFrom: 52 | secretKeyRef: 53 | name: app-secrets 54 | key: litellm-api-key 55 | - name: LITELLM_BASE_URL 56 | valueFrom: 57 | configMapKeyRef: 58 | name: app-config 59 | key: litellm-base-url 60 | # AWS Configuration 61 | - name: AWS_REGION 62 | valueFrom: 63 | configMapKeyRef: 64 | name: app-config 65 | key: aws-region 66 | # Server Configuration 67 | - name: PORT 68 | value: "8001" 69 | - name: HOST 70 | value: "0.0.0.0" 71 | resources: 72 | requests: 73 | memory: "128Mi" 74 | cpu: "50m" 75 | limits: 76 | memory: "256Mi" 77 | cpu: "200m" 78 | # Health checks removed as not needed for MCP server 79 | volumeMounts: 80 | - name: app-config 81 | mountPath: /app/config 82 | readOnly: true 83 | volumes: 84 | - name: app-config 85 | configMap: 86 | name: app-config 87 | restartPolicy: Always 88 | --- 89 | apiVersion: v1 90 | kind: Service 91 | metadata: 92 | name: tavily-mcp-service 93 | labels: 94 | app: tavily-mcp-server 95 | component: mcp-server 96 | spec: 97 | selector: 98 | app: tavily-mcp-server 99 | ports: 100 | - name: http 101 | port: 8001 102 | targetPort: 8001 103 | protocol: TCP 104 | type: ClusterIP 105 | -------------------------------------------------------------------------------- /milvus/README.md: -------------------------------------------------------------------------------- 1 | # Milvus Vector Database on AWS EKS with Graviton 2 | 3 | This directory contains configuration files for deploying Milvus, a vector database, on Amazon EKS with AWS Graviton processors. This setup is part of the larger project for cost-effective and scalable Small Language Models inference on AWS Graviton4 with EKS. 4 | 5 | ## Overview 6 | 7 | Milvus is an open-source vector database built to power embedding similarity search and AI applications. In this setup, we deploy Milvus in standalone mode on AWS Graviton-based EKS nodes to leverage the cost-effectiveness and performance of ARM64 architecture. 8 | 9 | ## Prerequisites 10 | 11 | - An existing EKS cluster with Graviton (ARM64) nodes 12 | - Cert-manager installed on the cluster 13 | - Milvus Operator installed on the cluster 14 | - AWS EBS CSI driver configured for persistent storage 15 | 16 | ## Configuration Files 17 | 18 | This directory includes the following configuration files: 19 | 20 | 1. **ebs-storage-class.yaml**: Defines an AWS EBS storage class for Milvus persistent storage 21 | - Uses gp3 volume type 22 | - Enables encryption 23 | - Configures WaitForFirstConsumer binding mode 24 | 25 | 2. **milvus-standalone.yaml**: Deploys Milvus in standalone mode 26 | - Configures Milvus to run on ARM64 (Graviton) nodes 27 | - Sets up resource requests 28 | - Configures in-cluster dependencies (etcd, pulsar, storage) 29 | - All components are configured to run on ARM64 architecture 30 | 31 | 3. **milvus-nlb-service.yaml**: Creates a Network Load Balancer service for external access 32 | - Exposes Milvus service port (19530) 33 | - Exposes metrics port (9091) 34 | - Configures internet-facing NLB 35 | 36 | ## Deployment Steps 37 | 38 | 1. **Install cert-manager** (if not already installed): 39 | ```bash 40 | kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.5.3/cert-manager.yaml 41 | kubectl get pods -n cert-manager 42 | ``` 43 | 44 | 2. **Install Milvus Operator** (if not already installed): 45 | ```bash 46 | kubectl apply -f https://raw.githubusercontent.com/zilliztech/milvus-operator/main/deploy/manifests/deployment.yaml 47 | kubectl get pods -n milvus-operator 48 | ``` 49 | 50 | 3. **Create EBS Storage Class**: 51 | ```bash 52 | kubectl apply -f ebs-storage-class.yaml 53 | ``` 54 | 55 | 4. **Deploy Milvus in standalone mode**: 56 | ```bash 57 | kubectl apply -f milvus-standalone.yaml 58 | ``` 59 | 60 | 5. **Create NLB Service** (optional, if you need external access): 61 | ```bash 62 | kubectl apply -f milvus-nlb-service.yaml 63 | ``` 64 | 65 | ## Accessing Milvus 66 | 67 | You can access Milvus using port-forwarding: 68 | ```bash 69 | kubectl port-forward service/my-release-milvus 19530:19530 70 | ``` 71 | 72 | Or through the Network Load Balancer if you deployed the NLB service. 73 | 74 | ## Integration with LLM Services 75 | 76 | This Milvus deployment can be integrated with the LLM services in the parent project for vector search capabilities, enabling: 77 | - Semantic search 78 | - Retrieval-augmented generation (RAG) 79 | - Document similarity matching 80 | - And other vector-based operations 81 | 82 | ## Uninstalling 83 | 84 | To uninstall Milvus: 85 | ```bash 86 | kubectl delete milvus my-release 87 | ``` 88 | 89 | ## Additional Resources 90 | 91 | - [Milvus Documentation](https://milvus.io/docs) 92 | - [Milvus Operator GitHub](https://github.com/zilliztech/milvus-operator) 93 | - [Main Project README](../README.md) 94 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/AmazonQ.md: -------------------------------------------------------------------------------- 1 | # Augmented LLM with MCP and RAG 2 | 3 | This project demonstrates a framework-independent implementation of an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness. 4 | 5 | ## Project Overview 6 | 7 | This application creates an AI agent that can: 8 | 1. Retrieve relevant information from a knowledge base using vector embeddings 9 | 2. Interact with external tools through the Model Context Protocol (MCP) 10 | 3. Generate responses based on both the retrieved context and tool interactions 11 | 4. Complete tasks like summarizing content and saving results to files 12 | 13 | ## Architecture 14 | 15 | The system is built with a modular architecture consisting of these key components: 16 | 17 | ``` 18 | Agent → Manages the overall workflow and coordinates components 19 | ├── ChatOpenAI → Handles LLM interactions and tool calling 20 | ├── MCPClient(s) → Connects to MCP servers for tool access 21 | └── EmbeddingRetriever → Performs vector search for relevant context 22 | └── VectorStore → Stores and searches document embeddings 23 | ``` 24 | 25 | ## Workflow Explanation 26 | 27 | 1. **Initialization**: 28 | - The system loads knowledge documents and creates embeddings using AWS Bedrock 29 | - Embeddings are stored in an in-memory vector store 30 | - MCP clients are initialized to connect to tool servers 31 | 32 | 2. **RAG Process**: 33 | - When a query is received, it's converted to an embedding 34 | - The system searches for the most relevant documents using cosine similarity 35 | - Retrieved documents are combined to form context for the LLM 36 | 37 | 3. **Agent Execution**: 38 | - The agent initializes with the LLM, MCP clients, and retrieved context 39 | - The user query is sent to the LLM along with the context 40 | - The LLM generates responses and may request tool calls 41 | 42 | 4. **Tool Usage**: 43 | - When the LLM requests a tool, the agent routes the call to the appropriate MCP client 44 | - The MCP client executes the tool and returns results 45 | - Results are fed back to the LLM to continue the conversation 46 | 47 | 5. **Output Generation**: 48 | - The LLM generates a final response incorporating tool results and context 49 | - In the example task, it creates a markdown file with information about "Antonette" 50 | 51 | ## Key Technologies 52 | 53 | - **LLM Integration**: Uses OpenAI API for language model capabilities 54 | - **MCP Implementation**: Connects to MCP servers for filesystem operations 55 | - **Vector Embeddings**: Uses AWS Bedrock for generating embeddings 56 | - **Vector Search**: Implements cosine similarity for finding relevant documents 57 | 58 | ## Implementation Details 59 | 60 | - **Framework Independence**: Built without relying on frameworks like LangChain or LlamaIndex 61 | - **Modular Design**: Components are separated for easy maintenance and extension 62 | - **AWS Integration**: Uses AWS Bedrock for embedding generation 63 | - **Tool Orchestration**: Manages tool calls and responses through MCP protocol 64 | 65 | ## Example Use Case 66 | 67 | The current implementation demonstrates a task where the agent: 68 | 1. Retrieves information about a user named "Antonette" from the knowledge base 69 | 2. Summarizes the information and creates a story about her 70 | 3. Saves the output to a markdown file using the filesystem MCP tool 71 | 72 | This architecture can be extended to support various tasks requiring context-aware responses and tool usage. 73 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/AmazonQ.md: -------------------------------------------------------------------------------- 1 | # Augmented LLM with MCP and RAG 2 | 3 | This project demonstrates a framework-independent implementation of an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness. 4 | 5 | ## Project Overview 6 | 7 | This application creates an AI agent that can: 8 | 1. Retrieve relevant information from a knowledge base using vector embeddings 9 | 2. Interact with external tools through the Model Context Protocol (MCP) 10 | 3. Generate responses based on both the retrieved context and tool interactions 11 | 4. Complete tasks like summarizing content and saving results to files 12 | 13 | ## Architecture 14 | 15 | The system is built with a modular architecture consisting of these key components: 16 | 17 | ``` 18 | Agent → Manages the overall workflow and coordinates components 19 | ├── ChatOpenAI → Handles LLM interactions and tool calling 20 | ├── MCPClient(s) → Connects to MCP servers for tool access 21 | └── EmbeddingRetriever → Performs vector search for relevant context 22 | └── VectorStore → Stores and searches document embeddings 23 | ``` 24 | 25 | ## Workflow Explanation 26 | 27 | 1. **Initialization**: 28 | - The system loads knowledge documents and creates embeddings using AWS Bedrock 29 | - Embeddings are stored in an in-memory vector store 30 | - MCP clients are initialized to connect to tool servers 31 | 32 | 2. **RAG Process**: 33 | - When a query is received, it's converted to an embedding 34 | - The system searches for the most relevant documents using cosine similarity 35 | - Retrieved documents are combined to form context for the LLM 36 | 37 | 3. **Agent Execution**: 38 | - The agent initializes with the LLM, MCP clients, and retrieved context 39 | - The user query is sent to the LLM along with the context 40 | - The LLM generates responses and may request tool calls 41 | 42 | 4. **Tool Usage**: 43 | - When the LLM requests a tool, the agent routes the call to the appropriate MCP client 44 | - The MCP client executes the tool and returns results 45 | - Results are fed back to the LLM to continue the conversation 46 | 47 | 5. **Output Generation**: 48 | - The LLM generates a final response incorporating tool results and context 49 | - In the example task, it creates a markdown file with information about "Antonette" 50 | 51 | ## Key Technologies 52 | 53 | - **LLM Integration**: Uses OpenAI API for language model capabilities 54 | - **MCP Implementation**: Connects to MCP servers for filesystem operations 55 | - **Vector Embeddings**: Uses AWS Bedrock for generating embeddings 56 | - **Vector Search**: Implements cosine similarity for finding relevant documents 57 | 58 | ## Implementation Details 59 | 60 | - **Framework Independence**: Built without relying on frameworks like LangChain or LlamaIndex 61 | - **Modular Design**: Components are separated for easy maintenance and extension 62 | - **AWS Integration**: Uses AWS Bedrock for embedding generation 63 | - **Tool Orchestration**: Manages tool calls and responses through MCP protocol 64 | 65 | ## Example Use Case 66 | 67 | The current implementation demonstrates a task where the agent: 68 | 1. Retrieves information about a user named "Antonette" from the knowledge base 69 | 2. Summarizes the information and creates a story about her 70 | 3. Saves the output to a markdown file using the filesystem MCP tool 71 | 72 | This architecture can be extended to support various tasks requiring context-aware responses and tool usage. 73 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/opensearch_client.py: -------------------------------------------------------------------------------- 1 | """OpenSearch client wrapper for the multi-agent RAG system.""" 2 | 3 | import logging 4 | from typing import Optional, Dict, Any 5 | from ..tools.opensearch_vector_store import OpenSearchVectorStore 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | class OpenSearchClient: 10 | """OpenSearch client wrapper that provides compatibility with server.py expectations.""" 11 | 12 | def __init__(self, config): 13 | """Initialize OpenSearch client with configuration.""" 14 | self.config = config 15 | self._vector_store = None 16 | self.client = None 17 | self._initialize_client() 18 | 19 | def _initialize_client(self) -> None: 20 | """Initialize OpenSearch client using the existing vector store.""" 21 | try: 22 | # Use the existing OpenSearchVectorStore implementation 23 | self._vector_store = OpenSearchVectorStore() 24 | 25 | # Expose the underlying client for compatibility 26 | if self._vector_store and self._vector_store.client: 27 | self.client = self._vector_store.client 28 | logger.info("OpenSearch client initialized successfully via vector store") 29 | else: 30 | logger.warning("OpenSearch vector store client not available") 31 | self.client = None 32 | 33 | except Exception as e: 34 | logger.error(f"Failed to initialize OpenSearch client: {e}") 35 | self.client = None 36 | 37 | def is_connected(self) -> bool: 38 | """Check if the client is connected and can reach OpenSearch.""" 39 | if not self.client: 40 | return False 41 | 42 | try: 43 | info = self.client.info() 44 | return True 45 | except Exception as e: 46 | logger.debug(f"OpenSearch connection check failed: {e}") 47 | return False 48 | 49 | def get_info(self) -> Optional[Dict[str, Any]]: 50 | """Get OpenSearch cluster information.""" 51 | if not self.client: 52 | return None 53 | 54 | try: 55 | return self.client.info() 56 | except Exception as e: 57 | logger.error(f"Failed to get OpenSearch info: {e}") 58 | return None 59 | 60 | def index_exists(self, index_name: str) -> bool: 61 | """Check if an index exists.""" 62 | if not self.client: 63 | return False 64 | 65 | try: 66 | return self.client.indices.exists(index=index_name) 67 | except Exception as e: 68 | logger.error(f"Failed to check if index exists: {e}") 69 | return False 70 | 71 | def get_document_count(self, index_name: str) -> int: 72 | """Get the number of documents in an index.""" 73 | if not self.client: 74 | return 0 75 | 76 | try: 77 | response = self.client.count(index=index_name) 78 | return response.get("count", 0) 79 | except Exception as e: 80 | logger.error(f"Failed to get document count: {e}") 81 | return 0 82 | 83 | def close(self) -> None: 84 | """Close the OpenSearch connection.""" 85 | if self._vector_store: 86 | try: 87 | self._vector_store.close() 88 | logger.info("OpenSearch connection closed") 89 | except Exception as e: 90 | logger.error(f"Error closing OpenSearch connection: {e}") 91 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/scripts/embed_knowledge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Knowledge Embedding Script 4 | 5 | This script processes all documents in the knowledge directory and creates 6 | embeddings for them in the OpenSearch vector store. 7 | """ 8 | 9 | import sys 10 | import logging 11 | from pathlib import Path 12 | from ..config import config 13 | from ..utils.logging import setup_logging, log_title 14 | from ..agents.knowledge_agent import knowledge_agent 15 | 16 | def main(): 17 | """Main function for embedding knowledge.""" 18 | # Setup logging 19 | setup_logging() 20 | logger = logging.getLogger(__name__) 21 | 22 | try: 23 | # Validate configuration 24 | config.validate_config() 25 | 26 | log_title("KNOWLEDGE EMBEDDING SCRIPT") 27 | logger.info("Starting knowledge embedding process") 28 | logger.info(f"Knowledge Directory: {config.KNOWLEDGE_DIR}") 29 | logger.info(f"OpenSearch Endpoint: {config.OPENSEARCH_ENDPOINT}") 30 | logger.info(f"Vector Index: {config.VECTOR_INDEX_NAME}") 31 | 32 | # Check if knowledge directory exists 33 | knowledge_path = Path(config.KNOWLEDGE_DIR) 34 | if not knowledge_path.exists(): 35 | logger.error(f"Knowledge directory does not exist: {config.KNOWLEDGE_DIR}") 36 | sys.exit(1) 37 | 38 | # Get knowledge statistics before embedding 39 | print("\n📊 Getting knowledge statistics...") 40 | stats_result = knowledge_agent("get_stats") 41 | if stats_result.get("success"): 42 | stats = stats_result.get("stats", {}) 43 | print(f"Total files: {stats.get('total_files', 0)}") 44 | print(f"File types: {stats.get('file_types', {})}") 45 | print(f"Current vector store count: {stats.get('vector_store_count', 0)}") 46 | 47 | # Check for changes 48 | print("\n🔍 Checking for knowledge changes...") 49 | check_result = knowledge_agent("check_changes") 50 | 51 | if not check_result.get("success"): 52 | logger.error(f"Failed to check for changes: {check_result.get('message')}") 53 | sys.exit(1) 54 | 55 | has_changes = check_result.get("has_changes", False) 56 | print(f"Changes detected: {has_changes}") 57 | 58 | # Embed knowledge (force embedding regardless of changes) 59 | print("\n🚀 Starting knowledge embedding...") 60 | embed_result = knowledge_agent("embed_knowledge") 61 | 62 | if embed_result.get("success"): 63 | print("✅ Knowledge embedding completed successfully!") 64 | 65 | # Get updated statistics 66 | print("\n📊 Updated knowledge statistics...") 67 | updated_stats_result = knowledge_agent("get_stats") 68 | if updated_stats_result.get("success"): 69 | updated_stats = updated_stats_result.get("stats", {}) 70 | print(f"Vector store count after embedding: {updated_stats.get('vector_store_count', 0)}") 71 | else: 72 | print(f"❌ Knowledge embedding failed: {embed_result.get('message')}") 73 | sys.exit(1) 74 | 75 | print("\n🎉 Knowledge embedding process completed!") 76 | 77 | except KeyboardInterrupt: 78 | print("\n\nProcess interrupted by user.") 79 | sys.exit(0) 80 | except Exception as e: 81 | logger.error(f"Knowledge embedding failed: {e}") 82 | print(f"❌ Error: {e}") 83 | sys.exit(1) 84 | 85 | if __name__ == "__main__": 86 | main() 87 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/async_cleanup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for handling async cleanup and suppressing warnings. 3 | """ 4 | 5 | import warnings 6 | import sys 7 | import logging 8 | from contextlib import contextmanager 9 | 10 | # Configure logging to suppress specific async warnings 11 | logging.getLogger("httpcore").setLevel(logging.ERROR) 12 | logging.getLogger("httpx").setLevel(logging.ERROR) 13 | logging.getLogger("anyio").setLevel(logging.ERROR) 14 | 15 | @contextmanager 16 | def suppress_async_warnings(): 17 | """Context manager to suppress async-related warnings during RAGAs evaluation.""" 18 | 19 | # Store original warning filters 20 | original_filters = warnings.filters[:] 21 | 22 | # Store original stderr 23 | original_stderr = sys.stderr 24 | 25 | try: 26 | # Suppress specific async warnings 27 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*async generator ignored GeneratorExit.*") 28 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*coroutine.*was never awaited.*") 29 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*Attempted to exit cancel scope.*") 30 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*no running event loop.*") 31 | 32 | # Suppress HTTP connection warnings 33 | warnings.filterwarnings("ignore", message=".*HTTP11ConnectionByteStream.*") 34 | warnings.filterwarnings("ignore", message=".*HTTP11Connection.*") 35 | 36 | # Create a custom stderr that filters out specific error messages 37 | class FilteredStderr: 38 | def __init__(self, original_stderr): 39 | self.original_stderr = original_stderr 40 | 41 | def write(self, text): 42 | # Filter out specific async error messages 43 | if any(phrase in text for phrase in [ 44 | "async generator ignored GeneratorExit", 45 | "Attempted to exit cancel scope", 46 | "no running event loop", 47 | "HTTP11ConnectionByteStream", 48 | "coroutine object HTTP11ConnectionByteStream.aclose" 49 | ]): 50 | return # Don't write these messages 51 | 52 | self.original_stderr.write(text) 53 | 54 | def flush(self): 55 | self.original_stderr.flush() 56 | 57 | def __getattr__(self, name): 58 | return getattr(self.original_stderr, name) 59 | 60 | # Replace stderr temporarily 61 | sys.stderr = FilteredStderr(original_stderr) 62 | 63 | yield 64 | 65 | finally: 66 | # Restore original settings 67 | warnings.filters[:] = original_filters 68 | sys.stderr = original_stderr 69 | 70 | def setup_async_environment(): 71 | """Set up the environment to minimize async warnings.""" 72 | 73 | # Configure logging levels 74 | loggers_to_quiet = [ 75 | "httpcore", 76 | "httpx", 77 | "anyio", 78 | "asyncio", 79 | "urllib3.connectionpool" 80 | ] 81 | 82 | for logger_name in loggers_to_quiet: 83 | logger = logging.getLogger(logger_name) 84 | logger.setLevel(logging.ERROR) 85 | 86 | # Set global warning filters 87 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*async generator ignored GeneratorExit.*") 88 | warnings.filterwarnings("ignore", category=RuntimeWarning, message=".*coroutine.*was never awaited.*") 89 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/rebuildCollection.ts: -------------------------------------------------------------------------------- 1 | import { MilvusClient, DataType } from '@zilliz/milvus2-sdk-node'; 2 | import { logTitle } from "./utils"; 3 | import 'dotenv/config'; 4 | 5 | // Function to rebuild the Milvus collection with 384 dimensions 6 | async function rebuildCollection() { 7 | logTitle('REBUILDING MILVUS COLLECTION'); 8 | 9 | const collectionName = 'rag_documents_384d'; 10 | const dimension = 384; // New dimension to match the custom embedding endpoint 11 | 12 | try { 13 | // Connect to Milvus 14 | const client = new MilvusClient({ 15 | address: process.env.MILVUS_ADDRESS || '', 16 | username: process.env.MILVUS_USERNAME || '', 17 | password: process.env.MILVUS_PASSWORD || '', 18 | }); 19 | 20 | // Check if collection exists and drop it if it does 21 | const hasCollection = await client.hasCollection({ 22 | collection_name: collectionName, 23 | }); 24 | 25 | if (hasCollection.value) { 26 | console.log(`Collection ${collectionName} already exists. Dropping it...`); 27 | await client.dropCollection({ 28 | collection_name: collectionName, 29 | }); 30 | console.log(`Collection ${collectionName} dropped successfully.`); 31 | } 32 | 33 | // Create new collection with 384 dimensions 34 | console.log(`Creating new collection ${collectionName} with ${dimension} dimensions...`); 35 | await client.createCollection({ 36 | collection_name: collectionName, 37 | fields: [ 38 | { 39 | name: 'id', 40 | data_type: DataType.Int64, 41 | is_primary_key: true, 42 | autoID: true, 43 | }, 44 | { 45 | name: 'embedding', 46 | data_type: DataType.FloatVector, 47 | dim: dimension, 48 | }, 49 | { 50 | name: 'document', 51 | data_type: DataType.VarChar, 52 | max_length: 65535, 53 | }, 54 | ], 55 | }); 56 | 57 | // Create index for vector search 58 | console.log(`Creating index for collection ${collectionName}...`); 59 | await client.createIndex({ 60 | collection_name: collectionName, 61 | field_name: 'embedding', 62 | index_type: 'HNSW', 63 | metric_type: 'COSINE', 64 | params: { M: 8, efConstruction: 64 }, 65 | }); 66 | 67 | // Load collection into memory 68 | console.log(`Loading collection ${collectionName} into memory...`); 69 | await client.loadCollection({ 70 | collection_name: collectionName, 71 | }); 72 | 73 | console.log(`Collection ${collectionName} created and indexed successfully.`); 74 | 75 | // Close connection 76 | await client.closeConnection(); 77 | 78 | return true; 79 | } catch (error) { 80 | console.error("Error rebuilding Milvus collection:", error); 81 | return false; 82 | } 83 | } 84 | 85 | // Main function 86 | (async () => { 87 | const success = await rebuildCollection(); 88 | 89 | if (success) { 90 | console.log("Collection rebuild completed successfully. Now you need to re-embed your documents."); 91 | console.log("Run 'pnpm embed-knowledge' and 'pnpm embed-csv' to populate the new collection."); 92 | } else { 93 | console.error("Collection rebuild failed"); 94 | process.exit(1); 95 | } 96 | })(); 97 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/run_main_clean.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Clean main application runner with complete async error suppression. 4 | """ 5 | 6 | import sys 7 | import os 8 | import warnings 9 | 10 | # Add current directory to path 11 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) 12 | 13 | class CompleteAsyncErrorFilter: 14 | """Complete async error filter that suppresses all async-related output.""" 15 | 16 | def __init__(self): 17 | self.original_stderr = sys.__stderr__ 18 | 19 | def write(self, text): 20 | """Filter out all async RuntimeErrors and related output.""" 21 | if not text.strip(): 22 | return 23 | 24 | # Comprehensive list of patterns to suppress 25 | suppress_patterns = [ 26 | "RuntimeError", 27 | "httpcore", 28 | "_synchronization", 29 | "asyncio", 30 | "anyio", 31 | "sniffio", 32 | "await", 33 | "async", 34 | "CancelScope", 35 | "shield", 36 | "current_task", 37 | "get_running_loop", 38 | "cancel_shielded_checkpoint", 39 | "_anyio_lock", 40 | "acquire", 41 | "File \"/home/ubuntu/Cost_Effective_and_Scalable_Models_Inference_on_AWS_Graviton/agentic-apps/strandsdk_agentic_rag_opensearch/venv/lib/python3.10/site-packages/httpcore", 42 | "File \"/usr/lib/python3.10/asyncio", 43 | "raise RuntimeError", 44 | ] 45 | 46 | # Check if this line should be suppressed 47 | should_suppress = any(pattern in text for pattern in suppress_patterns) 48 | 49 | # Also suppress lines that are just punctuation or whitespace 50 | if text.strip() in [":", "RuntimeError:", "RuntimeError: ", "RuntimeError", ""]: 51 | should_suppress = True 52 | 53 | # Only write if not suppressed and contains meaningful content 54 | if not should_suppress and len(text.strip()) > 1: 55 | self.original_stderr.write(text) 56 | self.original_stderr.flush() 57 | 58 | def flush(self): 59 | """Flush the original stderr.""" 60 | self.original_stderr.flush() 61 | 62 | def setup_complete_clean_environment(): 63 | """Set up completely clean environment.""" 64 | 65 | # Suppress all warnings 66 | warnings.filterwarnings("ignore") 67 | 68 | # Install complete error filter 69 | sys.stderr = CompleteAsyncErrorFilter() 70 | 71 | # Try to import and use existing cleanup if available 72 | try: 73 | from src.utils.global_async_cleanup import setup_global_async_cleanup 74 | setup_global_async_cleanup() 75 | except ImportError: 76 | pass 77 | 78 | if __name__ == "__main__": 79 | print("🚀 Starting Enhanced RAG System (Ultra Clean Mode)") 80 | print("=" * 60) 81 | print("Note: All async errors and warnings are completely suppressed") 82 | print("=" * 60) 83 | 84 | # Set up complete clean environment FIRST 85 | setup_complete_clean_environment() 86 | 87 | try: 88 | # Import and run the main application 89 | from src.main import main 90 | main() 91 | except KeyboardInterrupt: 92 | print("\n\n👋 Application stopped by user") 93 | except Exception as e: 94 | # Only show truly important errors 95 | error_msg = str(e) 96 | if not any(keyword in error_msg.lower() for keyword in [ 97 | "runtimeerror", "httpcore", "asyncio", "anyio", "await", "async" 98 | ]): 99 | print(f"\n❌ Application error: {e}") 100 | sys.exit(1) 101 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_opensearch/src/Agent.ts: -------------------------------------------------------------------------------- 1 | import MCPClient from "./MCPClient"; 2 | import ChatOpenAI from "./ChatOpenAI"; 3 | import { logTitle } from "./utils"; 4 | import { ToolCall } from "./ChatOpenAI"; 5 | 6 | export default class Agent { 7 | private mcpClients: MCPClient[]; 8 | private llm: ChatOpenAI | null = null; 9 | private model: string; 10 | private systemPrompt: string; 11 | private context: string; 12 | 13 | constructor(model: string, mcpClients: MCPClient[], systemPrompt: string = '', context: string = '') { 14 | this.mcpClients = mcpClients; 15 | this.model = model; 16 | this.systemPrompt = systemPrompt; 17 | this.context = context; 18 | } 19 | 20 | async init() { 21 | logTitle('TOOLS'); 22 | for await (const client of this.mcpClients) { 23 | await client.init(); 24 | } 25 | const tools = this.mcpClients.flatMap(client => client.getTools()); 26 | this.llm = new ChatOpenAI(this.model, this.systemPrompt, tools, this.context); 27 | } 28 | 29 | async close() { 30 | for await (const client of this.mcpClients) { 31 | await client.close(); 32 | } 33 | } 34 | 35 | async invoke(prompt: string) { 36 | if (!this.llm) throw new Error('Agent not initialized'); 37 | 38 | try { 39 | logTitle('AGENT EXECUTION'); 40 | console.log("Invoking LLM with tools..."); 41 | 42 | // Start the conversation with the user prompt 43 | let response = await this.llm.chat(prompt); 44 | 45 | // Continue the conversation until no more tool calls are needed 46 | while (response.toolCalls && response.toolCalls.length > 0) { 47 | logTitle('TOOL CALLS'); 48 | console.log(`Processing ${response.toolCalls.length} tool calls`); 49 | 50 | // Process each tool call 51 | for (const toolCall of response.toolCalls) { 52 | await this.processToolCall(toolCall); 53 | } 54 | 55 | // Continue the conversation with the tool results 56 | response = await this.llm.chat(); 57 | } 58 | 59 | logTitle('FINAL RESPONSE'); 60 | console.log("Successfully completed request"); 61 | return response.content; 62 | } catch (error) { 63 | console.error("Error in agent execution:", error); 64 | throw error; 65 | } 66 | } 67 | 68 | private async processToolCall(toolCall: ToolCall) { 69 | try { 70 | const { id, function: { name, arguments: argsString } } = toolCall; 71 | console.log(`Executing tool call: ${name}`); 72 | 73 | // Parse the arguments 74 | const args = JSON.parse(argsString); 75 | 76 | // Find the MCP client that can handle this tool 77 | const toolName = name; 78 | 79 | // Find the appropriate client 80 | const client = this.mcpClients[0]; // Since we only have one client 81 | 82 | if (!client) { 83 | throw new Error(`No MCP client found for tool: ${name}`); 84 | } 85 | 86 | // Call the tool and get the result 87 | const result = await client.callTool(toolName, args); 88 | console.log(`Tool result: ${JSON.stringify(result).substring(0, 100)}...`); 89 | 90 | // Append the tool result to the conversation 91 | this.llm?.appendToolResult(id, JSON.stringify(result)); 92 | } catch (error) { 93 | console.error(`Error processing tool call: ${error}`); 94 | // Append the error as the tool result 95 | this.llm?.appendToolResult(toolCall.id, JSON.stringify({ error: error.message })); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/utils/langfuse_config.py: -------------------------------------------------------------------------------- 1 | """Langfuse configuration and utilities.""" 2 | 3 | from typing import Optional, Dict, Any 4 | from ..config import config 5 | 6 | try: 7 | from langfuse import Langfuse 8 | LANGFUSE_AVAILABLE = True 9 | except ImportError: 10 | LANGFUSE_AVAILABLE = False 11 | Langfuse = None 12 | 13 | class LangfuseSpanWrapper: 14 | """Wrapper for Langfuse spans to handle API differences.""" 15 | def __init__(self, span): 16 | self.span = span 17 | 18 | def end(self, **kwargs): 19 | """End the span, handling different API versions.""" 20 | try: 21 | # For Langfuse 3.x 22 | if hasattr(self.span, 'end'): 23 | # Just call end without parameters 24 | self.span.end() 25 | except Exception as e: 26 | print(f"Warning: Failed to end span: {e}") 27 | 28 | class LangfuseConfig: 29 | """Langfuse configuration and trace management.""" 30 | 31 | def __init__(self): 32 | self.client: Optional[Langfuse] = None 33 | self._initialize_client() 34 | 35 | def _initialize_client(self) -> None: 36 | """Initialize Langfuse client if available and configured.""" 37 | if not LANGFUSE_AVAILABLE: 38 | print("Langfuse not available. Install with: pip install langfuse") 39 | return 40 | 41 | if not config.is_langfuse_enabled(): 42 | print("Langfuse not configured. Skipping initialization.") 43 | return 44 | 45 | try: 46 | self.client = Langfuse( 47 | host=config.LANGFUSE_HOST, 48 | public_key=config.LANGFUSE_PUBLIC_KEY, 49 | secret_key=config.LANGFUSE_SECRET_KEY 50 | ) 51 | print("Langfuse initialized successfully") 52 | except Exception as e: 53 | print(f"Failed to initialize Langfuse: {e}") 54 | self.client = None 55 | 56 | def create_trace(self, name: str, input_data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None): 57 | """Create a new trace.""" 58 | if not self.client: 59 | return None 60 | 61 | try: 62 | # For Langfuse 3.x 63 | trace_id = self.client.create_trace_id() 64 | # Use start_span without trace_id parameter 65 | trace = self.client.start_span( 66 | name=name, 67 | input=input_data, 68 | metadata=metadata or {} 69 | ) 70 | return LangfuseSpanWrapper(trace) 71 | except Exception as e: 72 | print(f"Failed to create trace: {e}") 73 | return None 74 | 75 | def create_span(self, trace, name: str, input_data: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None): 76 | """Create a new span within a trace.""" 77 | if not self.client: 78 | return None 79 | 80 | try: 81 | # For Langfuse 3.x 82 | span = self.client.start_span( 83 | name=name, 84 | input=input_data, 85 | metadata=metadata or {} 86 | ) 87 | return LangfuseSpanWrapper(span) 88 | except Exception as e: 89 | print(f"Failed to create span: {e}") 90 | return None 91 | 92 | def flush(self) -> None: 93 | """Flush pending traces.""" 94 | if self.client: 95 | try: 96 | self.client.flush() 97 | except Exception as e: 98 | print(f"Failed to flush Langfuse: {e}") 99 | 100 | @property 101 | def is_enabled(self) -> bool: 102 | """Check if Langfuse is enabled and available.""" 103 | return self.client is not None 104 | 105 | # Global Langfuse instance 106 | langfuse_config = LangfuseConfig() 107 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/MCPClient.ts: -------------------------------------------------------------------------------- 1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js"; 2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; 3 | import { Tool } from "@modelcontextprotocol/sdk/types.js"; 4 | 5 | export default class MCPClient { 6 | public mcp: Client; 7 | private command: string; 8 | private args: string[] 9 | private transport: StdioClientTransport | null = null; 10 | private tools: Tool[] = []; 11 | 12 | constructor(name: string, command: string, args: string[], version?: string) { 13 | this.mcp = new Client({ name, version: version || "0.0.1" }); 14 | this.command = command; 15 | this.args = args; 16 | } 17 | 18 | public async init() { 19 | await this.connectToServer(); 20 | } 21 | 22 | public async close() { 23 | await this.mcp.close(); 24 | } 25 | 26 | public getTools() { 27 | return this.tools; 28 | } 29 | 30 | public callTool(name: string, params: Record) { 31 | return this.mcp.callTool({ 32 | name, 33 | arguments: params, 34 | }); 35 | } 36 | 37 | private async connectToServer() { 38 | try { 39 | this.transport = new StdioClientTransport({ 40 | command: this.command, 41 | args: this.args, 42 | }); 43 | await this.mcp.connect(this.transport); 44 | 45 | try { 46 | const toolsResult = await this.mcp.listTools(); 47 | this.tools = toolsResult.tools.map((tool) => { 48 | // Prefix tool names with the client name to ensure proper routing 49 | const prefixedName = `${this.mcp.name}___${tool.name}`; 50 | return { 51 | name: prefixedName, 52 | description: tool.description, 53 | inputSchema: tool.inputSchema, 54 | }; 55 | }); 56 | console.log( 57 | "Connected to server with tools:", 58 | this.tools.map(({ name }) => name) 59 | ); 60 | } catch (toolError) { 61 | console.error("Failed to list tools from MCP server: ", toolError); 62 | console.log("Adding fallback write_file tool manually"); 63 | 64 | // Add a fallback write_file tool manually 65 | this.tools = [{ 66 | name: `${this.mcp.name}___write_file`, 67 | description: "Write content to a file", 68 | inputSchema: { 69 | type: "object", 70 | properties: { 71 | path: { type: "string", description: "Path to the file" }, 72 | content: { type: "string", description: "Content to write" } 73 | }, 74 | required: ["path", "content"] 75 | } 76 | }]; 77 | } 78 | } catch (e) { 79 | console.error("Failed to connect to MCP server: ", e); 80 | console.error("Error details:", e); 81 | console.log("Will continue without MCP tools and rely on fallback methods"); 82 | 83 | // Add a dummy tool so the agent can still make tool calls 84 | this.tools = [{ 85 | name: `${this.mcp.name}___write_file`, 86 | description: "Write content to a file (fallback)", 87 | inputSchema: { 88 | type: "object", 89 | properties: { 90 | path: { type: "string", description: "Path to the file" }, 91 | content: { type: "string", description: "Content to write" } 92 | }, 93 | required: ["path", "content"] 94 | } 95 | }]; 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /model-hosting/standalone-vllm-reasoning.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: vllm-qwen-server 6 | annotations: 7 | kubernetes.io/pvc-protection: "false" 8 | spec: 9 | accessModes: 10 | - ReadWriteOnce 11 | resources: 12 | requests: 13 | storage: 900Gi 14 | storageClassName: gp3 15 | volumeMode: Filesystem 16 | --- 17 | apiVersion: apps/v1 18 | kind: Deployment 19 | metadata: 20 | name: vllm-qwen-server 21 | labels: 22 | app: vllm-qwen-server 23 | spec: 24 | replicas: 1 25 | selector: 26 | matchLabels: 27 | app: vllm-qwen-server 28 | template: 29 | metadata: 30 | labels: 31 | app: vllm-qwen-server 32 | spec: 33 | # Updated nodeSelector to target GPU instances specifically 34 | nodeSelector: 35 | kubernetes.io/arch: amd64 36 | nvidia.com/gpu: present 37 | karpenter.sh/nodepool: gpu-inference 38 | # Updated tolerations to match the taint on the GPU nodepool 39 | tolerations: 40 | - key: "model-inferencing" 41 | operator: "Equal" 42 | value: "gpu-inference" 43 | effect: "NoSchedule" 44 | volumes: 45 | - name: cache-volume 46 | persistentVolumeClaim: 47 | claimName: vllm-qwen-server 48 | # vLLM needs to access the host's shared memory for tensor parallel inference. 49 | - name: shm 50 | emptyDir: 51 | medium: Memory 52 | sizeLimit: "32Gi" 53 | containers: 54 | - name: vllm-qwen-server 55 | image: vllm/vllm-openai:latest 56 | # image: vllm/vllm-openai:v0.7.3 57 | command: ["/bin/sh", "-c"] 58 | args: [ 59 | "vllm serve Qwen/Qwen3-14B --enable-auto-tool-choice --tool-call-parser hermes --trust-remote-code --max-num-batched-tokens 32768 --max-num-seqs 8 --max-model-len 32768 --dtype bfloat16 --tensor-parallel-size 4 --gpu-memory-utilization 0.90" 60 | 61 | ] 62 | env: 63 | - name: HUGGING_FACE_HUB_TOKEN 64 | valueFrom: 65 | secretKeyRef: 66 | name: hf-token 67 | key: token 68 | - name: OMP_NUM_THREADS 69 | value: "8" 70 | - name: VLLM_LOGGING_LEVEL 71 | value: "DEBUG" 72 | - name: VLLM_DISABLE_COMPILE_CACHE 73 | value: "0" 74 | # - name: PYTORCH_CUDA_ALLOC_CONF 75 | # value: "max_split_size_mb:512,expandable_segments:True" 76 | - name: CUDA_VISIBLE_DEVICES 77 | value: "0,1,2,3" 78 | ports: 79 | - containerPort: 8000 80 | resources: 81 | limits: 82 | memory: 64Gi 83 | nvidia.com/gpu: "4" 84 | requests: 85 | cpu: "22" 86 | memory: 64Gi 87 | nvidia.com/gpu: "4" 88 | volumeMounts: 89 | - mountPath: /root/.cache/huggingface 90 | name: cache-volume 91 | - name: shm 92 | mountPath: /dev/shm 93 | livenessProbe: 94 | httpGet: 95 | path: /health 96 | port: 8000 97 | initialDelaySeconds: 240 98 | periodSeconds: 10 99 | failureThreshold: 30 100 | successThreshold: 1 101 | 102 | 103 | readinessProbe: 104 | httpGet: 105 | path: /health 106 | port: 8000 107 | initialDelaySeconds: 240 108 | periodSeconds: 10 109 | 110 | --- 111 | apiVersion: v1 112 | kind: Service 113 | metadata: 114 | name: vllm-qwen-server 115 | spec: 116 | ports: 117 | - name: http-vllm-qwen-server 118 | port: 8000 119 | protocol: TCP 120 | targetPort: 8000 121 | # The label selector should match the deployment labels & it is useful for prefix caching feature 122 | selector: 123 | app: vllm-qwen-server 124 | sessionAffinity: None 125 | type: ClusterIP 126 | 127 | 128 | -------------------------------------------------------------------------------- /model-hosting/ray-server/llamacpp.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import multiprocessing 5 | import os 6 | import logging 7 | import time 8 | from fastapi import FastAPI 9 | from starlette.requests import Request 10 | from starlette.responses import StreamingResponse, JSONResponse 11 | 12 | from ray import serve 13 | 14 | from starlette.requests import Request 15 | from starlette.responses import StreamingResponse, JSONResponse 16 | import logging 17 | 18 | from llama_cpp import Llama 19 | 20 | 21 | 22 | logger = logging.getLogger("ray.serve") 23 | 24 | app = FastAPI() 25 | 26 | # Define the deployment 27 | # ray_actor_options={"num_cpus": 14}, 28 | @serve.deployment(name="LLamaCPPDeployment", autoscaling_config={"min_replicas" : 10, "max_replicas": 10, "initial_replicas": 10, "upscale_delay_s": 5}, max_ongoing_requests=100, graceful_shutdown_timeout_s=600) 29 | @serve.ingress(app) 30 | class LLamaCPPDeployment: 31 | def __init__(self, parallelism: str): 32 | os.environ["OMP_NUM_THREADS"] = parallelism 33 | # Initialize the LLamaCPP model 34 | self.model_id = os.getenv("MODEL_ID", default="SanctumAI/Llama-3.2-1B-Instruct-GGUF") 35 | # Get filename from environment variable with default fallback to "*Q4_0.gguf" 36 | self.filename = os.getenv("MODEL_FILENAME", default="*Q4_0.gguf") 37 | self.n_ctx = int(os.getenv("N_CTX")) 38 | self.n_threads = int(os.getenv("N_THREADS")) 39 | # self.n_batch = int(os.getenv("N_BATCH")) 40 | # self.llama_cpp = Llama(model_path=MODEL_ID, n_ctx=self.n_ctx, n_batch=self.n_batch) 41 | self.llm = Llama.from_pretrained(repo_id=self.model_id,filename=self.filename,n_ctx=self.n_ctx,n_threads=self.n_threads) 42 | #"hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF", 43 | print("__init__ Complete") 44 | 45 | @app.post("/v1/chat/completions") 46 | async def call_llama(self, request: Request): 47 | try: 48 | body = await request.json() 49 | 50 | # Get the messages array from the body 51 | messages = body.get("messages", []) 52 | 53 | # Get the content from the last user message 54 | prompt = "" 55 | if messages: 56 | for message in messages: 57 | if message.get("role") == "user": 58 | prompt = message.get("content", "") 59 | break 60 | 61 | 62 | if not prompt: 63 | return JSONResponse( 64 | status_code=400, 65 | content={"error": "prompt is required"} 66 | ) 67 | 68 | output = self.llm( 69 | "Q: " + prompt + " A: ", 70 | max_tokens=body.get("max_tokens", 32) 71 | ) 72 | 73 | return JSONResponse(content={ 74 | "id": "cmpl-" + os.urandom(12).hex(), 75 | "object": "text_completion", 76 | "created": int(time.time()), 77 | "model": self.model_id, 78 | "choices": [{ 79 | "text": output["choices"][0]["text"], 80 | "index": 0, 81 | "finish_reason": "stop" 82 | }], 83 | "usage": { 84 | "prompt_tokens": len(prompt.split()), 85 | "completion_tokens": len(output["choices"][0]["text"].split()), 86 | "total_tokens": len(prompt.split()) + len(output["choices"][0]["text"].split()) 87 | } 88 | }) 89 | 90 | 91 | except Exception as e: 92 | logger.error(f"Error: {str(e)}") 93 | return JSONResponse( 94 | status_code=500, 95 | content={"error": str(e)} 96 | ) 97 | 98 | 99 | # Get host CPU count 100 | host_cpu_count = multiprocessing.cpu_count() 101 | 102 | model = LLamaCPPDeployment.bind("host_cpu_count") -------------------------------------------------------------------------------- /model-hosting/standalone-vllm-vision.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: PersistentVolumeClaim 4 | metadata: 5 | name: vllm-qwen-server-vision 6 | annotations: 7 | kubernetes.io/pvc-protection: "false" 8 | spec: 9 | accessModes: 10 | - ReadWriteOnce 11 | resources: 12 | requests: 13 | storage: 900Gi 14 | storageClassName: gp3 15 | volumeMode: Filesystem 16 | --- 17 | apiVersion: apps/v1 18 | kind: Deployment 19 | metadata: 20 | name: vllm-qwen-server-vision 21 | labels: 22 | app: vllm-qwen-server-vision 23 | spec: 24 | replicas: 1 25 | selector: 26 | matchLabels: 27 | app: vllm-qwen-server-vision 28 | template: 29 | metadata: 30 | labels: 31 | app: vllm-qwen-server-vision 32 | spec: 33 | # Updated nodeSelector to target GPU instances specifically 34 | nodeSelector: 35 | kubernetes.io/arch: amd64 36 | nvidia.com/gpu: present 37 | karpenter.sh/nodepool: gpu-inference 38 | # Updated tolerations to match the taint on the GPU nodepool 39 | tolerations: 40 | - key: "model-inferencing" 41 | operator: "Equal" 42 | value: "gpu-inference" 43 | effect: "NoSchedule" 44 | volumes: 45 | - name: cache-volume 46 | persistentVolumeClaim: 47 | claimName: vllm-qwen-server-vision 48 | # vLLM needs to access the host's shared memory for tensor parallel inference. 49 | - name: shm 50 | emptyDir: 51 | medium: Memory 52 | sizeLimit: "32Gi" 53 | containers: 54 | - name: vllm-qwen-server-vision 55 | image: vllm/vllm-openai:latest 56 | # image: vllm/vllm-openai:v0.7.3 57 | command: ["/bin/sh", "-c"] 58 | args: [ 59 | "vllm serve Qwen/Qwen2.5-VL-7B-Instruct --enable-auto-tool-choice --tool-call-parser hermes --trust-remote-code --max-num-batched-tokens 8192 --max-num-seqs 8 --max-model-len 8192 --dtype bfloat16 --tensor-parallel-size 4 --gpu-memory-utilization 0.90" 60 | 61 | ] 62 | env: 63 | - name: HUGGING_FACE_HUB_TOKEN 64 | valueFrom: 65 | secretKeyRef: 66 | name: hf-token 67 | key: token 68 | - name: OMP_NUM_THREADS 69 | value: "8" 70 | - name: VLLM_LOGGING_LEVEL 71 | value: "DEBUG" 72 | - name: VLLM_DISABLE_COMPILE_CACHE 73 | value: "0" 74 | # - name: PYTORCH_CUDA_ALLOC_CONF 75 | # value: "max_split_size_mb:512,expandable_segments:True" 76 | - name: CUDA_VISIBLE_DEVICES 77 | value: "0,1,2,3" 78 | ports: 79 | - containerPort: 8000 80 | resources: 81 | limits: 82 | memory: 64Gi 83 | nvidia.com/gpu: "4" 84 | requests: 85 | cpu: "22" 86 | memory: 64Gi 87 | nvidia.com/gpu: "4" 88 | volumeMounts: 89 | - mountPath: /root/.cache/huggingface 90 | name: cache-volume 91 | - name: shm 92 | mountPath: /dev/shm 93 | livenessProbe: 94 | httpGet: 95 | path: /health 96 | port: 8000 97 | initialDelaySeconds: 240 98 | periodSeconds: 10 99 | failureThreshold: 30 100 | successThreshold: 1 101 | 102 | 103 | readinessProbe: 104 | httpGet: 105 | path: /health 106 | port: 8000 107 | initialDelaySeconds: 240 108 | periodSeconds: 10 109 | 110 | --- 111 | apiVersion: v1 112 | kind: Service 113 | metadata: 114 | name: vllm-qwen-server-vision 115 | spec: 116 | ports: 117 | - name: http-vllm-qwen-server-vision 118 | port: 8000 119 | protocol: TCP 120 | targetPort: 8000 121 | # The label selector should match the deployment labels & it is useful for prefix caching feature 122 | selector: 123 | app: vllm-qwen-server-vision 124 | sessionAffinity: None 125 | type: ClusterIP 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/src/test_agents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Agent Testing Script 4 | 5 | This script tests the multi-agent system with various queries to ensure 6 | all components are working correctly, including Langfuse integration. 7 | """ 8 | 9 | import sys 10 | import logging 11 | from typing import List, Dict, Any 12 | from .config import config 13 | from .utils.logging import setup_logging, log_title 14 | from .agents.supervisor_agent import supervisor_agent, supervisor_agent_with_langfuse 15 | from .agents.knowledge_agent import knowledge_agent, knowledge_agent_with_langfuse 16 | from .agents.mcp_agent import mcp_agent, mcp_agent_with_langfuse 17 | 18 | def main(): 19 | """Main function for testing agents.""" 20 | # Setup logging 21 | setup_logging() 22 | logger = logging.getLogger(__name__) 23 | 24 | try: 25 | # Validate configuration 26 | config.validate_config() 27 | 28 | log_title("AGENT TESTING SCRIPT") 29 | logger.info("Starting agent testing process") 30 | 31 | # Test individual agents first 32 | test_individual_agents() 33 | 34 | # Test supervisor agent 35 | test_supervisor_agent() 36 | 37 | print("\n🎉 All tests completed!") 38 | 39 | except KeyboardInterrupt: 40 | print("\n\nTesting interrupted by user.") 41 | sys.exit(0) 42 | except Exception as e: 43 | logger.error(f"Agent testing failed: {e}") 44 | print(f"❌ Error: {e}") 45 | sys.exit(1) 46 | 47 | def test_individual_agents(): 48 | """Test individual agents.""" 49 | log_title("INDIVIDUAL AGENT TESTS") 50 | 51 | # Test Knowledge Agent 52 | print("🧠 Testing Knowledge Agent...") 53 | try: 54 | if config.is_langfuse_enabled(): 55 | knowledge_response = knowledge_agent_with_langfuse("Please scan the knowledge directory and report what files are available.") 56 | print(f"✅ Knowledge Agent Response (with Langfuse): {str(knowledge_response)[:200]}...") 57 | else: 58 | knowledge_response = knowledge_agent("Please scan the knowledge directory and report what files are available.") 59 | print(f"✅ Knowledge Agent Response: {str(knowledge_response)[:200]}...") 60 | except Exception as e: 61 | print(f"❌ Knowledge Agent failed: {e}") 62 | 63 | # Test MCP Agent 64 | print("\n🔧 Testing MCP Agent...") 65 | try: 66 | if config.is_langfuse_enabled(): 67 | mcp_response = mcp_agent_with_langfuse("Please help me understand what tools are available for file operations.") 68 | print(f"✅ MCP Agent Response (with Langfuse): {str(mcp_response)[:200]}...") 69 | else: 70 | mcp_response = mcp_agent("Please help me understand what tools are available for file operations.") 71 | print(f"✅ MCP Agent Response: {str(mcp_response)[:200]}...") 72 | except Exception as e: 73 | print(f"❌ MCP Agent failed: {e}") 74 | 75 | def test_supervisor_agent(): 76 | """Test the supervisor agent with various queries.""" 77 | log_title("SUPERVISOR AGENT TESTS") 78 | 79 | test_queries = [ 80 | "What is the status of the knowledge base?", 81 | "Can you help me understand what files are in the knowledge directory?", 82 | "Please search for information about Bell's palsy if available." 83 | ] 84 | 85 | for i, query in enumerate(test_queries, 1): 86 | print(f"\n🧪 Test {i}: {query}") 87 | 88 | try: 89 | if config.is_langfuse_enabled(): 90 | response = supervisor_agent_with_langfuse(query) 91 | print(f"✅ Success (with Langfuse): {str(response)[:300]}...") 92 | else: 93 | response = supervisor_agent(query) 94 | print(f"✅ Success: {str(response)[:300]}...") 95 | 96 | except Exception as e: 97 | print(f"❌ Error: {e}") 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/src/EmbeddingRetriever.ts: -------------------------------------------------------------------------------- 1 | import { logTitle } from "./utils"; 2 | import MilvusVectorStore from "./MilvusVectorStore"; 3 | import 'dotenv/config'; 4 | import fetch from 'node-fetch'; 5 | 6 | export default class EmbeddingRetriever { 7 | private embeddingModel: string; 8 | private vectorStore: MilvusVectorStore; 9 | private embeddingEndpoint: string; 10 | 11 | constructor(embeddingModel: string) { 12 | this.embeddingModel = embeddingModel; 13 | this.vectorStore = new MilvusVectorStore(); 14 | this.embeddingEndpoint = 'http://18.232.167.163:8080/v1/embeddings'; 15 | } 16 | 17 | async embedDocument(document: string) { 18 | logTitle('EMBEDDING DOCUMENT'); 19 | const embedding = await this.embed(document); 20 | this.vectorStore.addEmbedding(embedding, document); 21 | return embedding; 22 | } 23 | 24 | async embedQuery(query: string) { 25 | logTitle('EMBEDDING QUERY'); 26 | const embedding = await this.embed(query); 27 | return embedding; 28 | } 29 | 30 | private async embed(document: string): Promise { 31 | try { 32 | console.log(`Sending embedding request to custom endpoint: ${this.embeddingEndpoint}`); 33 | console.log(`Document length: ${document.length} characters`); 34 | 35 | const response = await fetch(this.embeddingEndpoint, { 36 | method: 'POST', 37 | headers: { 38 | 'Content-Type': 'application/json', 39 | }, 40 | body: JSON.stringify({ 41 | content: document 42 | }), 43 | }); 44 | 45 | if (!response.ok) { 46 | throw new Error(`HTTP error! Status: ${response.status}`); 47 | } 48 | 49 | const responseBody = await response.json(); 50 | 51 | // Check if we got a valid embedding 52 | // The response format is an array with objects containing the embedding 53 | if (Array.isArray(responseBody) && responseBody.length > 0 && responseBody[0].embedding) { 54 | // Extract the embedding from the first item in the array 55 | const embedding = responseBody[0].embedding; 56 | 57 | // Check if the embedding is a nested array and flatten it if needed 58 | const flatEmbedding = Array.isArray(embedding[0]) ? embedding[0] : embedding; 59 | 60 | console.log(`Successfully received embedding with ${flatEmbedding.length} dimensions`); 61 | return flatEmbedding; 62 | } else { 63 | console.log("Warning: Embedding API didn't return a valid embedding"); 64 | console.log("Response:", JSON.stringify(responseBody, null, 2)); 65 | // Return a small random embedding vector for testing purposes 66 | return Array(1536).fill(0).map(() => Math.random()); 67 | } 68 | } catch (error) { 69 | console.error("Error fetching embedding from custom endpoint:", error); 70 | // Return a mock embedding in case of error 71 | return Array(1536).fill(0).map(() => Math.random()); 72 | } 73 | } 74 | 75 | async retrieve(query: string, topK: number = 3): Promise { 76 | console.log(`Starting retrieval for query: "${query.substring(0, 50)}..."`); 77 | 78 | const queryEmbedding = await this.embedQuery(query); 79 | console.log(`Generated query embedding with ${queryEmbedding.length} dimensions`); 80 | 81 | // Log a few values from the embedding to check consistency 82 | console.log(`Embedding sample values: [${queryEmbedding.slice(0, 5).join(', ')}]`); 83 | 84 | const results = await this.vectorStore.search(queryEmbedding, topK); 85 | console.log(`Search returned ${results.length} results`); 86 | 87 | if (results.length === 0) { 88 | console.log(`WARNING: No results found for query: "${query.substring(0, 50)}..."`); 89 | } else { 90 | console.log(`First result preview: "${results[0].substring(0, 100)}..."`); 91 | } 92 | 93 | return results; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/scripts/start_tavily_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Startup script for Tavily MCP Server 4 | """ 5 | 6 | import os 7 | import sys 8 | import subprocess 9 | import time 10 | import requests 11 | from pathlib import Path 12 | from dotenv import load_dotenv 13 | 14 | # Add the project root to Python path 15 | project_root = Path(__file__).parent.parent 16 | sys.path.insert(0, str(project_root)) 17 | 18 | # Load environment variables from .env file 19 | env_file = project_root / ".env" 20 | if env_file.exists(): 21 | load_dotenv(env_file) 22 | print(f"✅ Loaded environment variables from {env_file}") 23 | else: 24 | print(f"⚠️ No .env file found at {env_file}") 25 | print("Please create a .env file with your configuration") 26 | 27 | def check_tavily_api_key(): 28 | """Check if Tavily API key is configured""" 29 | api_key = os.getenv("TAVILY_API_KEY") 30 | if not api_key: 31 | print("❌ TAVILY_API_KEY environment variable is not set!") 32 | print("Please get your API key from https://tavily.com and set it in your .env file:") 33 | print("TAVILY_API_KEY=your-api-key-here") 34 | return False 35 | print(f"✅ Tavily API key configured: {api_key[:8]}...") 36 | return True 37 | 38 | def check_server_health(max_retries=10, delay=2): 39 | """Check if the Tavily MCP server is running and healthy""" 40 | for attempt in range(max_retries): 41 | try: 42 | # Try to connect to the MCP server endpoint 43 | response = requests.get("http://localhost:8001/", timeout=5) 44 | if response.status_code in [200, 404]: # 404 is OK for MCP server root 45 | print("✅ Tavily MCP server is healthy and ready!") 46 | return True 47 | except requests.exceptions.RequestException: 48 | pass 49 | 50 | if attempt < max_retries - 1: 51 | print(f"⏳ Waiting for server to start... (attempt {attempt + 1}/{max_retries})") 52 | time.sleep(delay) 53 | 54 | print("❌ Server health check failed after maximum retries") 55 | return False 56 | 57 | def start_tavily_server(): 58 | """Start the Tavily MCP server""" 59 | if not check_tavily_api_key(): 60 | return False 61 | 62 | print("🚀 Starting Tavily MCP Server...") 63 | 64 | # Path to the server script 65 | server_script = project_root / "src" / "mcp_servers" / "tavily_search_server.py" 66 | 67 | if not server_script.exists(): 68 | print(f"❌ Server script not found: {server_script}") 69 | return False 70 | 71 | try: 72 | # Start the server as a subprocess 73 | process = subprocess.Popen([ 74 | sys.executable, str(server_script) 75 | ], cwd=str(project_root)) 76 | 77 | print(f"📡 Server started with PID: {process.pid}") 78 | print("🔗 MCP server available at: http://localhost:8001/mcp") 79 | 80 | # Wait a moment for server to start 81 | time.sleep(3) 82 | 83 | # Check if server is healthy 84 | if check_server_health(): 85 | print("\n🎉 Tavily MCP Server is ready!") 86 | print("\nAvailable tools:") 87 | print(" - web_search: General web search with AI-generated answers") 88 | print(" - news_search: Recent news and current events search") 89 | print(" - health_check: Service health status") 90 | print("\n💡 The supervisor agent will automatically use web search when RAG relevance is low (<0.3)") 91 | return True 92 | else: 93 | print("❌ Server failed to start properly") 94 | process.terminate() 95 | return False 96 | 97 | except Exception as e: 98 | print(f"❌ Failed to start server: {e}") 99 | return False 100 | 101 | if __name__ == "__main__": 102 | success = start_tavily_server() 103 | if success: 104 | print("\n✨ Server is running! Press Ctrl+C to stop.") 105 | try: 106 | # Keep the script running 107 | while True: 108 | time.sleep(1) 109 | except KeyboardInterrupt: 110 | print("\n🛑 Shutting down server...") 111 | else: 112 | print("\n❌ Failed to start Tavily MCP Server") 113 | sys.exit(1) 114 | -------------------------------------------------------------------------------- /agentic-apps/agentic_rag_milvus/README.md: -------------------------------------------------------------------------------- 1 | # Agentic RAG with MCP and Custom Embedding 2 | 3 | This project implements an augmented Large Language Model (LLM) system that combines Model Context Protocol (MCP) for tool usage and Retrieval Augmented Generation (RAG) for enhanced context awareness, all without relying on frameworks like LangChain or LlamaIndex. 4 | 5 | ## Project Overview 6 | 7 | This application creates an AI agent that can: 8 | 1. Retrieve relevant information from a knowledge base using vector embeddings 9 | 2. Interact with external tools through the Model Context Protocol (MCP) 10 | 3. Generate responses based on both the retrieved context and tool interactions 11 | 4. Complete tasks like summarizing content and saving results to files 12 | 13 | ## Architecture 14 | 15 | The system is built with a modular architecture consisting of these key components: 16 | 17 | ``` 18 | Agent → Manages the overall workflow and coordinates components 19 | ├── ChatOpenAI → Handles LLM interactions and tool calling 20 | ├── MCPClient(s) → Connects to MCP servers for tool access 21 | └── EmbeddingRetriever → Performs vector search for relevant context 22 | └── VectorStore → Stores and searches document embeddings 23 | ``` 24 | 25 | ## Workflow Explanation 26 | 27 | 1. **Initialization**: 28 | - The system loads knowledge documents and creates embeddings using a custom embedding endpoint 29 | - Embeddings are stored in a Milvus vector database 30 | - MCP clients are initialized to connect to tool servers 31 | 32 | 2. **RAG Process**: 33 | - When a query is received, it's converted to an embedding 34 | - The system searches for the most relevant documents using cosine similarity 35 | - Retrieved documents are combined to form context for the LLM 36 | 37 | 3. **Agent Execution**: 38 | - The agent initializes with the LLM, MCP clients, and retrieved context 39 | - The user query is sent to the LLM along with the context 40 | - The LLM generates responses and may request tool calls 41 | 42 | 4. **Tool Usage**: 43 | - When the LLM requests a tool, the agent routes the call to the appropriate MCP client 44 | - The MCP client executes the tool and returns results 45 | - Results are fed back to the LLM to continue the conversation 46 | 47 | 5. **Output Generation**: 48 | - The LLM generates a final response incorporating tool results and context 49 | - In the example task, it creates a markdown file with information about "Antonette" 50 | 51 | ## Key Components 52 | 53 | - **Agent**: Coordinates the overall workflow and manages tool usage 54 | - **ChatOpenAI**: Handles interactions with the language model and tool calling 55 | - **MCPClient**: Connects to MCP servers and manages tool calls 56 | - **EmbeddingRetriever**: Creates and searches vector embeddings for relevant context 57 | - **MilvusVectorStore**: Interfaces with Milvus for storing and retrieving embeddings 58 | 59 | ## Getting Started 60 | 61 | ### Prerequisites 62 | 63 | - Node.js 18+ 64 | - pnpm or npm 65 | - OpenAI API key 66 | - Milvus database instance 67 | 68 | ### Installation 69 | 70 | ```bash 71 | # Clone the repository 72 | git clone 73 | 74 | # Install dependencies 75 | pnpm install 76 | 77 | # Set up environment variables 78 | # Create a .env file with: 79 | # - OPENAI_API_KEY 80 | # - OPENAI_BASE_URL (optional) 81 | # - MILVUS_ADDRESS 82 | ``` 83 | 84 | ### Usage 85 | 86 | ```bash 87 | # Embed knowledge documents 88 | pnpm embed-knowledge 89 | 90 | # Embed CSV data (optional) 91 | pnpm embed-csv 92 | 93 | # Run the application 94 | pnpm dev 95 | ``` 96 | 97 | ## Example Use Case 98 | 99 | The current implementation demonstrates a task where the agent: 100 | 1. Retrieves information about a user named "Antonette" from the knowledge base 101 | 2. Summarizes the information and creates a story about her 102 | 3. Saves the output to a markdown file using the filesystem MCP tool 103 | 104 | ## Extending the System 105 | 106 | This modular architecture can be easily extended: 107 | - Add more MCP servers for additional tool capabilities 108 | - Implement advanced Milvus features like filtering and hybrid search 109 | - Add more sophisticated RAG techniques like reranking or chunking 110 | - Implement conversation history for multi-turn interactions 111 | - Deploy as a service with API endpoints 112 | - Integrate with different LLM providers 113 | - Scale the vector database for production workloads 114 | -------------------------------------------------------------------------------- /agentic-apps/strandsdk_agentic_rag_opensearch/run_single_query_clean.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Clean single query runner with complete async error suppression. 4 | """ 5 | 6 | import sys 7 | import os 8 | import warnings 9 | 10 | # Add current directory to path 11 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) 12 | 13 | class CompleteAsyncErrorFilter: 14 | """Complete async error filter that suppresses all async-related output.""" 15 | 16 | def __init__(self): 17 | self.original_stderr = sys.__stderr__ 18 | 19 | def write(self, text): 20 | """Filter out all async RuntimeErrors and related output.""" 21 | if not text.strip(): 22 | return 23 | 24 | # Comprehensive list of patterns to suppress 25 | suppress_patterns = [ 26 | "RuntimeError", 27 | "httpcore", 28 | "_synchronization", 29 | "asyncio", 30 | "anyio", 31 | "sniffio", 32 | "await", 33 | "async", 34 | "CancelScope", 35 | "shield", 36 | "current_task", 37 | "get_running_loop", 38 | "cancel_shielded_checkpoint", 39 | "_anyio_lock", 40 | "acquire", 41 | "File \"/home/ubuntu/Cost_Effective_and_Scalable_Models_Inference_on_AWS_Graviton/agentic-apps/strandsdk_agentic_rag_opensearch/venv/lib/python3.10/site-packages/httpcore", 42 | "File \"/usr/lib/python3.10/asyncio", 43 | "raise RuntimeError", 44 | ] 45 | 46 | # Check if this line should be suppressed 47 | should_suppress = any(pattern in text for pattern in suppress_patterns) 48 | 49 | # Also suppress lines that are just punctuation or whitespace 50 | if text.strip() in [":", "RuntimeError:", "RuntimeError: ", "RuntimeError", ""]: 51 | should_suppress = True 52 | 53 | # Only write if not suppressed and contains meaningful content 54 | if not should_suppress and len(text.strip()) > 1: 55 | self.original_stderr.write(text) 56 | self.original_stderr.flush() 57 | 58 | def flush(self): 59 | """Flush the original stderr.""" 60 | self.original_stderr.flush() 61 | 62 | def setup_complete_clean_environment(): 63 | """Set up completely clean environment.""" 64 | 65 | # Suppress all warnings 66 | warnings.filterwarnings("ignore") 67 | 68 | # Install complete error filter 69 | sys.stderr = CompleteAsyncErrorFilter() 70 | 71 | # Try to import and use existing cleanup if available 72 | try: 73 | from src.utils.global_async_cleanup import setup_global_async_cleanup 74 | setup_global_async_cleanup() 75 | except ImportError: 76 | pass 77 | 78 | def run_clean_query(query: str): 79 | """Run a single query with completely clean output.""" 80 | print("🚀 Enhanced RAG System - Single Query (Ultra Clean Mode)") 81 | print("=" * 60) 82 | print(f"Query: {query}") 83 | print("=" * 60) 84 | print("Note: All async errors and warnings are completely suppressed") 85 | print("=" * 60) 86 | 87 | # Set up complete clean environment FIRST 88 | setup_complete_clean_environment() 89 | 90 | try: 91 | from src.agents.supervisor_agent import supervisor_agent 92 | 93 | print("\n🔍 Processing query...") 94 | response = supervisor_agent(query) 95 | 96 | print("\n📝 Response:") 97 | print("-" * 40) 98 | print(response) 99 | print("-" * 40) 100 | print("\n✅ Query completed successfully!") 101 | 102 | return True 103 | 104 | except Exception as e: 105 | # Only show truly important errors 106 | error_msg = str(e) 107 | if not any(keyword in error_msg.lower() for keyword in [ 108 | "runtimeerror", "httpcore", "asyncio", "anyio", "await", "async" 109 | ]): 110 | print(f"\n❌ Error processing query: {e}") 111 | return False 112 | 113 | if __name__ == "__main__": 114 | # Test with a sample query 115 | test_query = "What is Bell's palsy and how is it treated?" 116 | 117 | if len(sys.argv) > 1: 118 | # Use command line argument if provided 119 | test_query = " ".join(sys.argv[1:]) 120 | 121 | success = run_clean_query(test_query) 122 | sys.exit(0 if success else 1) 123 | --------------------------------------------------------------------------------