├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── NOTICE.txt ├── README.md ├── admin-ui ├── backend │ └── app │ │ ├── Dockerfile │ │ ├── __init__.py │ │ ├── config.py │ │ ├── dependencies.py │ │ ├── main.py │ │ ├── metric_routes.py │ │ ├── models.py │ │ ├── openapi.json │ │ ├── relay_routes.py │ │ ├── requirements.txt │ │ └── utils.py └── frontend │ └── foundations-admin │ ├── README.md │ ├── assets │ └── css │ │ ├── main.css │ │ └── swagger-ui.css.map │ ├── components │ ├── Applications │ │ └── Table.vue │ └── Metrics │ │ └── Invocation │ │ └── LogsTable.vue │ ├── layouts │ └── shell.vue │ ├── middleware │ └── auth.ts │ ├── nuxt.config.ts │ ├── package-lock.json │ ├── package.json │ ├── pages │ ├── app │ │ ├── applications │ │ │ └── index.vue │ │ ├── index.vue │ │ ├── metrics │ │ │ └── index.vue │ │ ├── playground │ │ │ └── index.vue │ │ └── services │ │ │ └── index.vue │ └── index.vue │ ├── plugins │ └── cognito-config.js │ ├── public │ ├── spec.json │ ├── swagger-ui-bundle.js │ ├── swagger-ui-standalone-preset.js │ └── swagger-ui.css │ ├── server │ └── tsconfig.json │ ├── tailwind.config.js │ └── tsconfig.json ├── build-images.sh ├── build-ui.sh ├── cdk └── GenAIFoundations │ ├── README.md │ ├── bin │ └── gen_ai_foundations.ts │ ├── cdk.json │ ├── jest.config.js │ ├── lib │ ├── foundations-main-stack.ts │ └── waf-stack.ts │ ├── migrate.json │ ├── package-lock.json │ ├── package.json │ └── tsconfig.json ├── config.txt ├── cookbook ├── notebooks │ └── quickstarts │ │ ├── quickstart-rag.ipynb │ │ ├── quickstart-summarization.ipynb │ │ ├── reqs.txt │ │ └── utils.py └── sample-apps │ ├── document_comparision │ ├── README.md │ ├── app.py │ ├── reqs.txt │ └── sdk │ │ ├── __init__.py │ │ └── accelerator.py │ ├── document_summarization │ ├── README.md │ ├── app.py │ ├── reqs.txt │ └── sdk │ │ ├── __init__.py │ │ └── accelerator.py │ └── simple-chat │ ├── README.md │ ├── app.py │ ├── reqs.txt │ └── sdk │ ├── __init__.py │ └── accelerator.py ├── docs ├── adminportal.md ├── api_docs │ ├── README.md │ ├── api_docs.html │ ├── assets │ │ ├── swagger-initializer.js │ │ ├── swagger-ui-bundle.js │ │ ├── swagger-ui-bundle.js.map │ │ ├── swagger-ui-es-bundle-core.js │ │ ├── swagger-ui-es-bundle-core.js.map │ │ ├── swagger-ui-es-bundle.js │ │ ├── swagger-ui-es-bundle.js.map │ │ ├── swagger-ui-standalone-preset.js │ │ ├── swagger-ui-standalone-preset.js.map │ │ ├── swagger-ui.css │ │ ├── swagger-ui.css.map │ │ ├── swagger-ui.js │ │ ├── swagger-ui.js.map │ │ ├── swagger.css │ │ └── swagger.js │ ├── openapi.json │ └── spec.js ├── implementation.md └── microservices.md ├── image ├── HighLevelArchitecture.png ├── adminapiplayground.png ├── adminlogin.gif ├── adminmetrics.gif ├── adminonboardapp.gif ├── adminservices.png ├── architecture.png ├── authentication_flow.png ├── chunkingprocess.png ├── deployment.png ├── extractionprocess.png └── how-it-works.png ├── sdk ├── accelerator.py ├── quickstart-sdk.ipynb └── reqs.txt ├── services ├── foundations_chunking │ ├── Dockerfile │ ├── app.py │ ├── models.py │ ├── requirements.txt │ └── utils │ │ ├── fixed_size_chunking.py │ │ ├── json_chunking.py │ │ ├── page_wise_chunking.py │ │ └── recursive_chunking.py ├── foundations_document_processing │ ├── Dockerfile │ ├── app.py │ ├── models.py │ └── requirements.txt ├── foundations_extraction │ ├── Dockerfile │ ├── app.py │ ├── models.py │ ├── requirements.txt │ └── utils │ │ └── extractor.py ├── foundations_model_invocation │ ├── Dockerfile │ ├── adapters.py │ ├── app.py │ ├── models.py │ └── requirements.txt ├── foundations_prompt_management │ ├── Dockerfile │ ├── app.py │ ├── models.py │ └── requirements.txt ├── foundations_vector_job_process │ ├── Dockerfile │ ├── app.py │ ├── models.py │ ├── requirements.txt │ └── utils │ │ └── vectorize.py └── foundations_vectorization │ ├── Dockerfile │ ├── app.py │ ├── models.py │ ├── requirements.txt │ └── utils │ └── opensearchutil.py └── testing ├── auth └── test-service.py └── models └── test_model_invoke.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /admin-ui/backend/app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /admin-ui/backend/app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/admin-ui/backend/app/__init__.py -------------------------------------------------------------------------------- /admin-ui/backend/app/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # # For UI authentication 4 | # COGNITO_CLIENT_ID = os.getenv("COGNITO_CLIENT_ID") 5 | # COGNITO_JWK_URL = os.getenv("COGNITO_JWK_URL", "") 6 | # AWS_REGION = os.getenv("AWS_REGION","") 7 | # USER_POOL_ID = os.getenv("USER_POOL_ID") 8 | 9 | # # For UI Backend authentication 10 | # APP_USER_POOL_ID = os.getenv("APP_USER_POOL_ID") 11 | # PLATFORM_APP_CLIENT_ID = os.getenv("PLATFORM_APP_CLIENT_ID") 12 | # PLATFORM_APP_CLIENT_SECRET = os.getenv("PLATFORM_APP_CLIENT_SECRET") 13 | # PLATFORM_DOMAIN = os.getenv("PLATFORM_DOMAIN") 14 | # DYNAMODB_TABLE_NAME = os.getenv("DYNAMODB_TABLE_NAME") 15 | # PLATFORM_BASE_URL = os.getenv("PLATFORM_BASE_URL") 16 | # PLARFORM_SERVICES = os.getenv("PLARFORM_SERVICES") 17 | # OPENAPI_SPEC = os.getenv("OPENAPI_SPEC") 18 | # CORS_ORIGIN = os.getenv("CORS_ORIGIN") 19 | 20 | # # Model Service 21 | # INVOCATION_LOG_TABLE = os.getenv("INVOCATION_LOG_TABLE") 22 | 23 | # PLARFORM_SERVICES = { 24 | # "document_processing": { 25 | # "service_name": "Extraction Service", 26 | # "service_url": "document/service/health", 27 | # "base_path": "/admin/document/" 28 | # }, 29 | # "model_invocation": { 30 | # "service_name": "Model Invocation Service", 31 | # "service_url": "model/service/health", 32 | # "base_path": "/admin/model/" 33 | # } 34 | # } 35 | 36 | class ConfManager: 37 | def __init__(self): 38 | self.COGNITO_CLIENT_ID = os.getenv("COGNITO_CLIENT_ID") 39 | self.COGNITO_JWK_URL = os.getenv("COGNITO_JWK_URL", "") 40 | self.AWS_REGION = os.getenv("AWS_REGION","") 41 | self.USER_POOL_ID = os.getenv("USER_POOL_ID") 42 | self.APP_USER_POOL_ID = os.getenv("APP_USER_POOL_ID") 43 | self.PLATFORM_APP_CLIENT_ID = os.getenv("PLATFORM_APP_CLIENT_ID") 44 | self.PLATFORM_DOMAIN = os.getenv("PLATFORM_DOMAIN") 45 | self.DYNAMODB_TABLE_NAME = os.getenv("DYNAMODB_TABLE_NAME") 46 | self.PLATFORM_BASE_URL = os.getenv("PLATFORM_BASE_URL") 47 | self.PLARFORM_SERVICES = os.getenv("PLARFORM_SERVICES") 48 | self.OPENAPI_SPEC = os.getenv("OPENAPI_SPEC") 49 | self.CORS_ORIGIN = os.getenv("CORS_ORIGIN") 50 | self.INVOCATION_LOG_TABLE = os.getenv("INVOCATION_LOG_TABLE") 51 | self.PLARFORM_SERVICES = { 52 | "document_processing": { 53 | "service_name": "Extraction Service", 54 | "service_url": "document/service/health", 55 | "base_path": "/admin/document/" 56 | }, 57 | "model_invocation": { 58 | "service_name": "Model Invocation Service", 59 | "service_url": "model/service/health", 60 | "base_path": "/admin/model/" 61 | }, 62 | "vectorization": { 63 | "service_name": "Vectorization Service", 64 | "service_url": "vector/service/health", 65 | "base_path": "/admin/vector/" 66 | }, 67 | "prompt_management": { 68 | "service_name": "Prompt Management Service", 69 | "service_url": "prompt/service/health", 70 | "base_path": "/admin/prompt/" 71 | } 72 | } 73 | 74 | def get_conf(self): 75 | return { 76 | "COGNITO_CLIENT_ID": self.COGNITO_CLIENT_ID, 77 | "COGNITO_JWK_URL": self.COGNITO_JWK_URL, 78 | "AWS_REGION": self.AWS_REGION, 79 | "USER_POOL_ID": self.USER_POOL_ID, 80 | "APP_USER_POOL_ID": self.APP_USER_POOL_ID, 81 | "PLATFORM_APP_CLIENT_ID": self.PLATFORM_APP_CLIENT_ID, 82 | "PLATFORM_DOMAIN": self.PLATFORM_DOMAIN, 83 | "DYNAMODB_TABLE_NAME": self.DYNAMODB_TABLE_NAME, 84 | "PLATFORM_BASE_URL": self.PLATFORM_BASE_URL, 85 | "PLARFORM_SERVICES": self.PLARFORM_SERVICES, 86 | "OPENAPI_SPEC": self.OPENAPI_SPEC, 87 | "CORS_ORIGIN": self.CORS_ORIGIN, 88 | "INVOCATION_LOG_TABLE": self.INVOCATION_LOG_TABLE 89 | } 90 | 91 | conf = ConfManager() 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /admin-ui/backend/app/dependencies.py: -------------------------------------------------------------------------------- 1 | from fastapi import Request, HTTPException, Depends 2 | import jwt 3 | from utils import get_cognito_public_keys 4 | import json 5 | import config 6 | from utils import cognito_token_manager 7 | # from jwt.contrib.algorithms.pycrypto import RSAAlgorithm 8 | # jwt.register_algorithm('RS256', RSAAlgorithm(RSAAlgorithm.SHA256)) 9 | from cryptography.hazmat.primitives.asymmetric import rsa 10 | from cryptography.hazmat.primitives import serialization 11 | from cryptography.hazmat.backends import default_backend 12 | import base64 13 | import logging 14 | 15 | logging.basicConfig(level=logging.INFO) 16 | logger = logging.getLogger(__name__) 17 | 18 | conf = config.ConfManager() 19 | 20 | def jwk_to_pem(jwk): 21 | exponent = base64.urlsafe_b64decode(jwk['e'] + '==') 22 | modulus = base64.urlsafe_b64decode(jwk['n'] + '==') 23 | 24 | public_numbers = rsa.RSAPublicNumbers( 25 | int.from_bytes(exponent, byteorder='big'), 26 | int.from_bytes(modulus, byteorder='big') 27 | ) 28 | public_key = public_numbers.public_key(default_backend()) 29 | pem = public_key.public_bytes( 30 | encoding=serialization.Encoding.PEM, 31 | format=serialization.PublicFormat.SubjectPublicKeyInfo 32 | ) 33 | return pem 34 | 35 | async def verify_token(request: Request): 36 | id_token = request.cookies.get("access_token") 37 | if not id_token: 38 | raise HTTPException(status_code=401, detail="Not authenticated") 39 | 40 | try: 41 | keys = await get_cognito_public_keys() 42 | unverified_header = jwt.get_unverified_header(id_token) 43 | rsa_key_pem = None 44 | for key in keys["keys"]: 45 | if key["kid"] == unverified_header["kid"]: 46 | rsa_key_pem = jwk_to_pem(key) 47 | break 48 | 49 | if rsa_key_pem: 50 | payload = jwt.decode( 51 | id_token, 52 | rsa_key_pem, 53 | algorithms=["RS256"], 54 | options={"verify_signature": True, "verify_aud":False} 55 | ) 56 | 57 | return payload 58 | else: 59 | print("Invalid token header") 60 | raise HTTPException(status_code=401, detail="Invalid token header") 61 | except jwt.PyJWTError as e: 62 | logger.error(f"Token verification failed: {str(e)}") 63 | raise HTTPException(status_code=401, detail=f"Token verification failed: {str(e)}") 64 | 65 | def get_cognito_token(): 66 | return cognito_token_manager.get_token() 67 | -------------------------------------------------------------------------------- /admin-ui/backend/app/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | class ModelInvocationLogs(Dyntastic): 12 | __table_name__ = lambda: os.environ.get("INVOCATION_LOG_TABLE") 13 | __hash_key__ = "invocation_id" 14 | 15 | invocation_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 16 | timestamp: datetime = Field(default_factory=datetime.now) 17 | model_name: str 18 | model_id: str 19 | input_tokens: Optional[int] = None 20 | output_tokens: Optional[int] = None 21 | app_id: str 22 | status: str 23 | error_message: Optional[str] = None 24 | 25 | class ExtractionJobs(Dyntastic): 26 | __table_name__ = lambda: os.environ.get("EXTRACTION_JOBS_TABLE") 27 | __hash_key__ = "job_id" 28 | 29 | job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 30 | app_id: str 31 | completed_file_count: int = 0 32 | total_file_count: int 33 | failed_file_count: int = 0 34 | status: str = "CREATED" 35 | queued_files: int = 0 36 | timestamp: datetime = Field(default_factory=datetime.now) 37 | 38 | class ExtractionJobFiles(Dyntastic): 39 | 40 | __table_name__ = lambda: os.environ.get("EXTRACTION_JOB_FILES_TABLE") 41 | __hash_key__ = "job_id" 42 | __range_key__ = "file_name" 43 | 44 | job_id: str 45 | file_name: str 46 | file_path: str 47 | file_id: str 48 | status: str = "PENDING" 49 | timestamp: datetime = Field(default_factory=datetime.now) 50 | 51 | 52 | class ChunkingJobs(Dyntastic): 53 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_TABLE") 54 | __hash_key__ = "chunking_job_id" 55 | 56 | chunking_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 57 | extraction_job_id: str 58 | app_id: str 59 | status: str 60 | chunking_strategy: str 61 | chunking_params: str 62 | total_file_count: int 63 | queued_files: int 64 | completed_files: int 65 | failed_files: int 66 | timestamp: datetime = Field(default_factory=datetime.now) 67 | 68 | class ChunkingJobFiles(Dyntastic): 69 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_FILES_TABLE") 70 | __hash_key__ = "chunk_job_file_id" 71 | 72 | chunk_job_file_id: str 73 | chunking_job_id: str 74 | app_id: str 75 | file_name: str 76 | file_path: str 77 | file_id: str 78 | status: str 79 | timestamp: datetime = Field(default_factory=datetime.now) 80 | 81 | class PromptTemplate(Dyntastic): 82 | __table_name__ = lambda: os.environ.get("PROMPT_TEMPLATE_TABLE") 83 | __hash_key__ = "name" 84 | 85 | id: str = Field(default_factory=lambda: str(uuid.uuid4())) 86 | app_id: str 87 | name: str 88 | prompt_template: str 89 | version: int 90 | timestamp: datetime = Field(default_factory=datetime.now) 91 | 92 | class VectorStore(Dyntastic): 93 | __table_name__ = lambda: os.environ.get("VECTOR_STORES_TABLE") 94 | __hash_key__ = "vector_store_id" 95 | __range_key__ = "app_id" 96 | 97 | vector_store_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 98 | store_name: str 99 | app_id: str 100 | created_at: datetime = Field(default_factory=datetime.now) 101 | host: str 102 | store_type: str 103 | 104 | class VectorIndex(Dyntastic): 105 | __table_name__ = lambda: os.environ.get("VECTOR_STORES_INDEX_TABLE") 106 | __hash_key__ = "index_id" 107 | 108 | index_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 109 | vector_store_id: str 110 | index_name: str 111 | created_at: datetime = Field(default_factory=datetime.now) 112 | 113 | class VectorizationJobs(Dyntastic): 114 | 115 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOBS_TABLE") 116 | __hash_key__ = "vectorize_job_id" 117 | 118 | vectorize_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 119 | vector_store_id: str 120 | index_id: str 121 | chunking_job_id: str 122 | created_at: datetime = Field(default_factory=datetime.now) 123 | status: str 124 | total_file_count: int 125 | queued_files: int 126 | completed_file_count: int 127 | failed_file_count: int 128 | 129 | class VectorizationJobFiles(Dyntastic): 130 | 131 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOB_FILES_TABLE") 132 | __hash_key__ = "vectorize_job_file_id" 133 | 134 | vectorize_job_file_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 135 | vectorize_job_id: str 136 | file_path: str 137 | status: str 138 | created_at: datetime = Field(default_factory=datetime.now) -------------------------------------------------------------------------------- /admin-ui/backend/app/relay_routes.py: -------------------------------------------------------------------------------- 1 | # relay_routes.py 2 | from fastapi import APIRouter, Request, Depends, HTTPException 3 | from dependencies import verify_token, get_cognito_token 4 | from utils import cognito_token_manager 5 | from config import conf 6 | import requests 7 | from typing import Dict, Any, Type 8 | from pydantic import BaseModel, create_model 9 | import logging 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | 16 | router = APIRouter() 17 | EXTERNAL_API_URL = conf.PLATFORM_BASE_URL.rstrip("/") 18 | 19 | def generate_post_endpoints(openapi_spec: Dict[str, Any]): 20 | for path, path_item in openapi_spec.get("paths", {}).items(): 21 | for method, operation in path_item.items(): 22 | if method not in ["get", "post", "put", "delete"]: 23 | continue 24 | 25 | request_model = None 26 | if "requestBody" in operation: 27 | request_body = operation["requestBody"] 28 | if "application/json" in request_body["content"]: 29 | schema = request_body["content"]["application/json"]["schema"] 30 | request_model = create_model("") 31 | 32 | path = '/admin' + path 33 | endpoint_function = create_relay_endpoint_function(path, method, request_model) 34 | router.add_api_route(path, endpoint_function, methods=[method.upper()]) 35 | elif method == "post": 36 | path = '/admin' + path 37 | endpoint_function = create_relay_endpoint_function(path, method) 38 | router.add_api_route(path, endpoint_function, methods=[method.upper()]) 39 | 40 | def generate_get_endpoints(): 41 | services = conf.PLARFORM_SERVICES 42 | for service, details in services.items(): 43 | base_path = details.get("base_path") + "{full_path:path}" # => /admin/document/{full_path:path} 44 | endpoint_function = create_get_endpoint_function(base_path) 45 | router.add_api_route(base_path, endpoint_function, methods=["GET"]) 46 | 47 | def create_get_endpoint_function(path: str): 48 | async def get_ep(full_path: str, request: Request, payload: dict = Depends(verify_token), token: str = Depends(get_cognito_token)): 49 | headers = {"Authorization": f"Bearer {token}"} 50 | logger.info(f"Full path: {full_path}") 51 | logger.info(f"Eternal API URL: {EXTERNAL_API_URL}") 52 | logger.info(f"Base path: {path}") 53 | base_path = path.split("{")[0] 54 | url = f"{EXTERNAL_API_URL}{path}{full_path}".replace("/admin", "").replace("{full_path:path}", "") 55 | 56 | logger.info(f"Making GET request to {url} with headers {headers}") 57 | 58 | try: 59 | response = requests.get(url, headers=headers, timeout=120) 60 | response.raise_for_status() 61 | except requests.exceptions.RequestException as e: 62 | raise HTTPException(status_code=500, detail="Internal Server Error") 63 | 64 | try: 65 | response_json = response.json() 66 | except ValueError: 67 | raise HTTPException(status_code=500, detail="Internal Server Error") 68 | 69 | return response_json 70 | 71 | return get_ep 72 | 73 | def create_relay_endpoint_function(path: str, method: str, request_model: Type[BaseModel] = None): 74 | async def relay_ep(request: Request, payload: dict = Depends(verify_token), token: str = Depends(get_cognito_token)): 75 | headers = {"Authorization": f"Bearer {token}"} 76 | url = f"{EXTERNAL_API_URL}{path}".replace("/admin", "") 77 | # url = f"{EXTERNAL_API_URL}{path}" 78 | response = None 79 | 80 | logger.info(f"Making {method.upper()} request to {url} with headers {headers}") 81 | 82 | if method == "get": 83 | response = requests.get(url, headers=headers, timeout=120) 84 | response.raise_for_status() 85 | elif method == "post": 86 | try: 87 | json_payload = await request.json() if request_model else {} 88 | response = requests.post(url, headers=headers, json=json_payload, timeout=120) 89 | response.raise_for_status() 90 | except Exception as e: 91 | json_payload = {} 92 | elif method == "put": 93 | json_payload = await request.json() if request_model else {} 94 | response = requests.put(url, headers=headers, json=json_payload, timeout=120) 95 | response.raise_for_status() 96 | elif method == "delete": 97 | response = requests.delete(url, headers=headers, timeout=120) 98 | response.raise_for_status() 99 | 100 | if response: 101 | return response.json() 102 | else: 103 | raise HTTPException(status_code=500, detail="Internal Server Error") 104 | 105 | return relay_ep 106 | 107 | def init_relay_router(): 108 | logger.info("Initializing relay router") 109 | spec = conf.OPENAPI_SPEC 110 | generate_post_endpoints(spec) 111 | generate_get_endpoints() 112 | logger.info("Relay router initialized") 113 | logger.info(router.routes) -------------------------------------------------------------------------------- /admin-ui/backend/app/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.3.0 3 | boto3==1.34.104 4 | botocore==1.34.104 5 | cryptography==43.0.0 6 | certifi==2024.7.4 7 | charset-normalizer==3.3.2 8 | click==8.1.7 9 | dnspython==2.6.1 10 | dyntastic==0.14.0 11 | ecdsa==0.19.0 12 | email_validator==2.1.1 13 | exceptiongroup==1.2.1 14 | fastapi==0.111.0 15 | fastapi-cli==0.0.3 16 | h11==0.14.0 17 | httpcore==1.0.5 18 | httptools==0.6.1 19 | httpx==0.27.0 20 | idna==3.7 21 | Jinja2==3.1.4 22 | jmespath==1.0.1 23 | jose==1.0.0 24 | markdown-it-py==3.0.0 25 | MarkupSafe==2.1.5 26 | mdurl==0.1.2 27 | orjson==3.10.3 28 | pyasn1==0.6.0 29 | pydantic==2.7.1 30 | pydantic_core==2.18.2 31 | Pygments==2.18.0 32 | python-dateutil==2.9.0.post0 33 | python-dotenv==1.0.1 34 | python-multipart==0.0.9 35 | PyYAML==6.0.1 36 | PyJWT==2.8.0 37 | requests==2.32.2 38 | rich==13.7.1 39 | rsa==4.9 40 | s3transfer==0.10.1 41 | shellingham==1.5.4 42 | six==1.16.0 43 | sniffio==1.3.1 44 | starlette==0.37.2 45 | typer==0.12.3 46 | typing_extensions==4.11.0 47 | ujson==5.10.0 48 | urllib3==1.26.19 49 | uvicorn==0.29.0 50 | uvloop==0.19.0 51 | watchfiles==0.21.0 52 | websockets==12.0 53 | -------------------------------------------------------------------------------- /admin-ui/backend/app/utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import base64 4 | import httpx 5 | import requests 6 | from config import conf 7 | import boto3 8 | import jwt 9 | # from jwt.contrib.algorithms.pycrypto import RSAAlgorithm 10 | # jwt.register_algorithm('RS256', RSAAlgorithm(RSAAlgorithm.SHA256)) 11 | from cryptography.hazmat.primitives.asymmetric import rsa 12 | from cryptography.hazmat.primitives import serialization 13 | from cryptography.hazmat.backends import default_backend 14 | import base64 15 | import logging 16 | logging.basicConfig(level=logging.INFO) 17 | logger = logging.getLogger(__name__) 18 | 19 | def jwk_to_pem(jwk): 20 | exponent = base64.urlsafe_b64decode(jwk['e'] + '==') 21 | modulus = base64.urlsafe_b64decode(jwk['n'] + '==') 22 | 23 | public_numbers = rsa.RSAPublicNumbers( 24 | int.from_bytes(exponent, byteorder='big'), 25 | int.from_bytes(modulus, byteorder='big') 26 | ) 27 | public_key = public_numbers.public_key(default_backend()) 28 | pem = public_key.public_bytes( 29 | encoding=serialization.Encoding.PEM, 30 | format=serialization.PublicFormat.SubjectPublicKeyInfo 31 | ) 32 | return pem 33 | 34 | 35 | class CognitoTokenManager: 36 | def __init__(self, client_id, client_secret, user_pool_id, region, domain): 37 | self.client_id = client_id 38 | self.client_secret = "" 39 | self.user_pool_id = user_pool_id 40 | self.region = region 41 | self.token = None 42 | self.expiry = 0 43 | self.domain = domain 44 | 45 | # return true if the token is valid 46 | async def validate_token_signature(self, token): 47 | try: 48 | keys = await get_cognito_public_keys() 49 | unverified_header = jwt.get_unverified_header(token) 50 | rsa_key_pem = None 51 | for key in keys["keys"]: 52 | if key["kid"] == unverified_header["kid"]: 53 | rsa_key_pem = jwk_to_pem(key) 54 | break 55 | if rsa_key_pem: 56 | payload = jwt.decode( 57 | token, 58 | rsa_key_pem, 59 | algorithms=["RS256"], 60 | options={"verify_signature": True, "verify_aud":False} 61 | ) 62 | return True 63 | else: 64 | return False 65 | except Exception as e: 66 | logger.error(f"Token validation error: {e}") 67 | return False 68 | 69 | 70 | def get_token(self): 71 | current_time = time.time() 72 | if self.token is None or current_time >= self.expiry: 73 | self.token = self._fetch_token() 74 | return self.token 75 | 76 | def _fetch_token_with_secret(self): 77 | auth_header = base64.b64encode(f'{self.client_id}:{self.client_secret}'.encode('utf-8')).decode('utf-8') 78 | token_url = f'https://{self.domain}/oauth2/token' 79 | headers = { 80 | 'Content-Type': 'application/x-www-form-urlencoded', 81 | 'Authorization': f'Basic {auth_header}' 82 | } 83 | data = { 84 | 'grant_type': 'client_credentials', 85 | 'client_id': self.client_id, 86 | 'scope': 'genaifoundations/read' 87 | } 88 | response = requests.post(token_url, headers=headers, data=data, timeout=60) 89 | if response.status_code == 200: 90 | token_data = response.json() 91 | self.expiry = time.time() + token_data['expires_in'] - 60 # Subtract 60 seconds to handle latency 92 | return token_data['access_token'] 93 | else: 94 | raise Exception(f"Failed to get access token: {response.status_code} {response.text}") 95 | 96 | def _fetch_token(self): 97 | 98 | session = boto3.Session(region_name=conf.AWS_REGION) 99 | client = session.client('cognito-idp') 100 | 101 | # describe user pool client 102 | response = client.describe_user_pool_client( 103 | UserPoolId=self.user_pool_id, 104 | ClientId=self.client_id 105 | ) 106 | 107 | # get client secret 108 | client_secret = response['UserPoolClient']['ClientSecret'] 109 | self.client_secret = client_secret 110 | 111 | return self._fetch_token_with_secret() 112 | 113 | 114 | 115 | async def get_cognito_public_keys(): 116 | async with httpx.AsyncClient() as client: 117 | response = await client.get(conf.COGNITO_JWK_URL) 118 | response.raise_for_status() 119 | return response.json() 120 | 121 | # Initialize the token manager 122 | cognito_token_manager = CognitoTokenManager(conf.PLATFORM_APP_CLIENT_ID, "", conf.APP_USER_POOL_ID, conf.AWS_REGION, conf.PLATFORM_DOMAIN) 123 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/README.md: -------------------------------------------------------------------------------- 1 | # Nuxt 3 Minimal Starter 2 | 3 | Look at the [Nuxt 3 documentation](https://nuxt.com/docs/getting-started/introduction) to learn more. 4 | 5 | ## Setup 6 | 7 | Make sure to install the dependencies: 8 | 9 | ```bash 10 | # npm 11 | npm install 12 | 13 | # pnpm 14 | pnpm install 15 | 16 | # yarn 17 | yarn install 18 | 19 | # bun 20 | bun install 21 | ``` 22 | 23 | ## Development Server 24 | 25 | Start the development server on `http://localhost:3000`: 26 | 27 | ```bash 28 | # npm 29 | npm run dev 30 | 31 | # pnpm 32 | pnpm run dev 33 | 34 | # yarn 35 | yarn dev 36 | 37 | # bun 38 | bun run dev 39 | ``` 40 | 41 | ## Production 42 | 43 | Build the application for production: 44 | 45 | ```bash 46 | # npm 47 | npm run build 48 | 49 | # pnpm 50 | pnpm run build 51 | 52 | # yarn 53 | yarn build 54 | 55 | # bun 56 | bun run build 57 | ``` 58 | 59 | Locally preview production build: 60 | 61 | ```bash 62 | # npm 63 | npm run preview 64 | 65 | # pnpm 66 | pnpm run preview 67 | 68 | # yarn 69 | yarn preview 70 | 71 | # bun 72 | bun run preview 73 | ``` 74 | 75 | Check out the [deployment documentation](https://nuxt.com/docs/getting-started/deployment) for more information. 76 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/assets/css/main.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/components/Metrics/Invocation/LogsTable.vue: -------------------------------------------------------------------------------- 1 | 47 | 48 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/layouts/shell.vue: -------------------------------------------------------------------------------- 1 | 75 | 76 | 110 | 133 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/middleware/auth.ts: -------------------------------------------------------------------------------- 1 | export default defineNuxtRouteMiddleware((to, from) => { 2 | 3 | const runtimeConfig = useRuntimeConfig(); 4 | const BASE_URL = runtimeConfig.public.baseUrl 5 | const response = fetch(BASE_URL + 'admin/auth/status', { 6 | method: 'GET', 7 | credentials: 'include'}).then(response => { 8 | 9 | if (!response.ok) { 10 | window.location.href = '/'; 11 | } 12 | else{ 13 | console.log('response is ok'); 14 | } 15 | } 16 | ).catch(error => { 17 | window.location.href = '/'; 18 | }); 19 | return response; 20 | 21 | }); -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/nuxt.config.ts: -------------------------------------------------------------------------------- 1 | // https://nuxt.com/docs/api/configuration/nuxt-config 2 | export default defineNuxtConfig({ 3 | devtools: { enabled: false }, 4 | ssr: false, 5 | router: { 6 | options: { 7 | hashMode: true 8 | } 9 | }, 10 | plugins: ['~/plugins/cognito-config.js'], 11 | vite: { 12 | define: { 13 | global: {} 14 | } 15 | }, 16 | css: ['~/assets/css/main.css'], 17 | app: { 18 | head:{ 19 | script: [ 20 | { 21 | src: '/swagger-ui-bundle.js' 22 | }, 23 | { 24 | src: '/swagger-ui-standalone-preset.js' 25 | } 26 | ] 27 | } 28 | }, 29 | postcss: { 30 | plugins: { 31 | tailwindcss: {}, 32 | autoprefixer: {}, 33 | }, 34 | }, 35 | modules: ["nuxt-auth-utils", "@nuxt/ui", "nuxt-security"], 36 | runtimeConfig: { 37 | public:{ 38 | baseUrl: '' 39 | } 40 | }, 41 | security: { 42 | headers: { 43 | contentSecurityPolicy: { 44 | 'default-src': ["'self'"], 45 | 'script-src': ["'self'"], 46 | 'style-src': ["'self'", "'unsafe-inline'"], 47 | 'connect-src': ["'self'", "", "https:///oauth2/token","https://api.iconify.design","https://api.unisvg.com", "https://api.simplesvg.com"] 48 | } 49 | } 50 | } 51 | }) -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nuxt-app", 3 | "private": true, 4 | "type": "module", 5 | "scripts": { 6 | "build": "nuxt build", 7 | "dev": "nuxt dev", 8 | "generate": "nuxt generate", 9 | "preview": "nuxt preview", 10 | "postinstall": "nuxt prepare" 11 | }, 12 | "dependencies": { 13 | "@aws-sdk/client-cognito-identity-provider": "^3.575.0", 14 | "@nuxt/ui": "^2.16.0", 15 | "amazon-cognito-identity-js": "^6.3.12", 16 | "date-fns": "^2.30.0", 17 | "nuxt": "^3.11.2", 18 | "nuxt-auth-utils": "^0.0.24", 19 | "nuxt-security": "^2.0.0-rc.9", 20 | "swagger-ui": "^5.17.13", 21 | "swagger-ui-dist": "^5.17.13", 22 | "v-calendar": "^3.1.2", 23 | "vue": "^3.4.27", 24 | "vue-router": "^4.3.2" 25 | }, 26 | "devDependencies": { 27 | "@types/swagger-ui": "^3.52.4", 28 | "autoprefixer": "^10.4.19", 29 | "postcss": "^8.4.38", 30 | "tailwindcss": "^3.4.3" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/pages/app/applications/index.vue: -------------------------------------------------------------------------------- 1 | 32 | 33 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/pages/app/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | 15 | 16 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/pages/app/playground/index.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | 44 | 45 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/pages/app/services/index.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/pages/index.vue: -------------------------------------------------------------------------------- 1 | 33 | 34 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/plugins/cognito-config.js: -------------------------------------------------------------------------------- 1 | // plugins/cognito-config.js 2 | const awsCognitoConfig = { 3 | region: '', 4 | userPoolId: '', 5 | clientId: '', 6 | domain: '', 7 | redirectUri: '', 8 | logoutUri: '', 9 | }; 10 | 11 | 12 | 13 | export default awsCognitoConfig; -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/public/spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "openapi": "3.1.0", 3 | "info": { 4 | "title": "Generative AI Foundations API", 5 | "version": "0.1" 6 | }, 7 | "paths": { 8 | "/model/service/health": { 9 | "get": { 10 | "summary": "Model Service Health", 11 | "operationId": "model_service_health_model_service_health_get", 12 | "tags": [ 13 | "Model Service" 14 | ], 15 | "responses": { 16 | "200": { 17 | "description": "Successful Response", 18 | "content": { 19 | "application/json": { 20 | "schema": {} 21 | } 22 | } 23 | } 24 | } 25 | } 26 | }, 27 | "/api/example1": { 28 | "post": { 29 | "summary": "Example1 Post", 30 | "operationId": "example1_post_api_example1_post", 31 | "responses": { 32 | "200": { 33 | "description": "Successful Response", 34 | "content": { 35 | "application/json": { 36 | "schema": {} 37 | } 38 | } 39 | } 40 | } 41 | } 42 | }, 43 | "/api/example2": { 44 | "get": { 45 | "summary": "Example2 Get", 46 | "operationId": "example2_get_api_example2_get", 47 | "responses": { 48 | "200": { 49 | "description": "Successful Response", 50 | "content": { 51 | "application/json": { 52 | "schema": {} 53 | } 54 | } 55 | } 56 | } 57 | }, 58 | "post": { 59 | "summary": "Example2 Post", 60 | "operationId": "example2_post_api_example2_post", 61 | "responses": { 62 | "200": { 63 | "description": "Successful Response", 64 | "content": { 65 | "application/json": { 66 | "schema": {} 67 | } 68 | } 69 | } 70 | } 71 | } 72 | }, 73 | "/docs": { 74 | "get": { 75 | "summary": "Get Docs", 76 | "operationId": "get_docs_docs_get", 77 | "responses": { 78 | "200": { 79 | "description": "Successful Response", 80 | "content": { 81 | "application/json": { 82 | "schema": {} 83 | } 84 | } 85 | } 86 | } 87 | } 88 | }, 89 | "/openapi.json": { 90 | "get": { 91 | "summary": "Get Openapi", 92 | "operationId": "get_openapi_openapi_json_get", 93 | "responses": { 94 | "200": { 95 | "description": "Successful Response", 96 | "content": { 97 | "application/json": { 98 | "schema": {} 99 | } 100 | } 101 | } 102 | } 103 | } 104 | }, 105 | "/api/auth/status": { 106 | "get": { 107 | "summary": "Auth Status", 108 | "operationId": "auth_status_api_auth_status_get", 109 | "responses": { 110 | "200": { 111 | "description": "Successful Response", 112 | "content": { 113 | "application/json": { 114 | "schema": {} 115 | } 116 | } 117 | } 118 | } 119 | } 120 | }, 121 | "/api/your_protected_route": { 122 | "get": { 123 | "summary": "Protected Route", 124 | "operationId": "protected_route_api_your_protected_route_get", 125 | "responses": { 126 | "200": { 127 | "description": "Successful Response", 128 | "content": { 129 | "application/json": { 130 | "schema": {} 131 | } 132 | } 133 | } 134 | } 135 | } 136 | }, 137 | "/api/services/health": { 138 | "get": { 139 | "summary": "Protected Route", 140 | "operationId": "protected_route_api_services_health_get", 141 | "responses": { 142 | "200": { 143 | "description": "Successful Response", 144 | "content": { 145 | "application/json": { 146 | "schema": {} 147 | } 148 | } 149 | } 150 | } 151 | } 152 | }, 153 | "/api/sample_endpoint": { 154 | "post": { 155 | "summary": "Sample Endpoint", 156 | "operationId": "sample_endpoint_api_sample_endpoint_post", 157 | "responses": { 158 | "200": { 159 | "description": "Successful Response", 160 | "content": { 161 | "application/json": { 162 | "schema": {} 163 | } 164 | } 165 | } 166 | } 167 | } 168 | }, 169 | "/api/create_app_client": { 170 | "post": { 171 | "summary": "Create App Client", 172 | "operationId": "create_app_client_api_create_app_client_post", 173 | "responses": { 174 | "200": { 175 | "description": "Successful Response", 176 | "content": { 177 | "application/json": { 178 | "schema": {} 179 | } 180 | } 181 | } 182 | } 183 | } 184 | }, 185 | "/api/get_all_app_clients": { 186 | "get": { 187 | "summary": "Get All App Clients", 188 | "operationId": "get_all_app_clients_api_get_all_app_clients_get", 189 | "responses": { 190 | "200": { 191 | "description": "Successful Response", 192 | "content": { 193 | "application/json": { 194 | "schema": {} 195 | } 196 | } 197 | } 198 | } 199 | } 200 | } 201 | } 202 | } -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/server/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../.nuxt/tsconfig.server.json" 3 | } 4 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: [ 4 | "./components/**/*.{js,vue,ts}", 5 | "./layouts/**/*.vue", 6 | "./pages/**/*.vue", 7 | "./plugins/**/*.{js,ts}", 8 | "./app.vue", 9 | "./error.vue", 10 | ], 11 | theme: { 12 | extend: {}, 13 | }, 14 | plugins: [], 15 | } 16 | 17 | -------------------------------------------------------------------------------- /admin-ui/frontend/foundations-admin/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // https://nuxt.com/docs/guide/concepts/typescript 3 | "extends": "./.nuxt/tsconfig.json" 4 | } 5 | -------------------------------------------------------------------------------- /build-images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Variables 4 | ACCOUNT_ID=$1 5 | REGION=$2 6 | CONFIG_FILE="config.txt" 7 | 8 | # Function to check if a command exists 9 | command_exists() { 10 | command -v "$1" >/dev/null 2>&1 11 | } 12 | 13 | # Check for required tools 14 | if ! command_exists aws; then 15 | echo "Error: AWS CLI is not installed. Please install AWS CLI to continue." 16 | exit 1 17 | fi 18 | 19 | if ! command_exists docker; then 20 | echo "Error: Docker is not installed. Please install Docker to continue." 21 | exit 1 22 | fi 23 | 24 | if [ -z "$ACCOUNT_ID" ] || [ -z "$REGION" ]; then 25 | echo "Usage: $0 " 26 | exit 1 27 | fi 28 | 29 | # Check if Docker daemon is running 30 | if ! docker info >/dev/null 2>&1; then 31 | echo "Error: Docker daemon is not running. Please start the Docker daemon to continue." 32 | exit 1 33 | fi 34 | 35 | REPO_URL="$ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com" 36 | 37 | # Login to ECR 38 | aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $REPO_URL 39 | 40 | # Function to create ECR repository if it doesn't exist 41 | create_ecr_repo() { 42 | REPO_NAME=$1 43 | aws ecr describe-repositories --repository-names $REPO_NAME --region $REGION > /dev/null 2>&1 44 | if [ $? -ne 0 ]; then 45 | aws ecr create-repository --repository-name $REPO_NAME --image-scanning-configuration scanOnPush=true --region $REGION 46 | fi 47 | } 48 | 49 | # Array to keep track of successful image pushes 50 | SUCCESSFUL_IMAGES=() 51 | 52 | # Read config file and process each line 53 | while IFS= read -r line || [ -n "$line" ]; do 54 | # Skip empty lines 55 | if [ -z "$line" ]; then 56 | continue 57 | fi 58 | 59 | # Split the line into remote container name and local folder path 60 | REMOTE_CONTAINER_NAME=$(echo $line | awk '{print $1}') 61 | LOCAL_FOLDER_PATH=$(echo $line | awk '{print $2}') 62 | 63 | # Construct the full image name 64 | IMAGE_NAME="$REPO_URL/$REMOTE_CONTAINER_NAME:latest" 65 | 66 | # Create the ECR repository if it doesn't exist 67 | create_ecr_repo $REMOTE_CONTAINER_NAME 68 | 69 | # Build the Docker image 70 | echo "Building Docker image for $LOCAL_FOLDER_PATH..." 71 | docker build --no-cache -t $REMOTE_CONTAINER_NAME:latest $LOCAL_FOLDER_PATH 72 | if [ $? -ne 0 ]; then 73 | echo "Error: Failed to build Docker image for $LOCAL_FOLDER_PATH." 74 | continue 75 | fi 76 | 77 | # Tag the Docker image 78 | echo "Tagging Docker image as $IMAGE_NAME..." 79 | docker tag $REMOTE_CONTAINER_NAME:latest $IMAGE_NAME 80 | if [ $? -ne 0 ]; then 81 | echo "Error: Failed to tag Docker image $IMAGE_NAME." 82 | continue 83 | fi 84 | 85 | # Push the Docker image to ECR 86 | echo "Pushing Docker image to $IMAGE_NAME..." 87 | docker push $IMAGE_NAME 88 | if [ $? -ne 0 ]; then 89 | echo "Error: Failed to push Docker image $IMAGE_NAME." 90 | continue 91 | fi 92 | 93 | # Wait for a few seconds to ensure the push is complete 94 | sleep 10 95 | 96 | # Add to successful images array 97 | SUCCESSFUL_IMAGES+=("$IMAGE_NAME") 98 | 99 | done < $CONFIG_FILE 100 | 101 | # Check if all images were created and pushed 102 | TOTAL_IMAGES=$(grep -v -e '^\s*$' $CONFIG_FILE | wc -l) 103 | SUCCESSFUL_COUNT=${#SUCCESSFUL_IMAGES[@]} 104 | 105 | echo "Total images to be created and pushed: $TOTAL_IMAGES" 106 | echo "Total successfully created and pushed images: $SUCCESSFUL_COUNT" 107 | 108 | if [ $SUCCESSFUL_COUNT -eq $TOTAL_IMAGES ]; then 109 | echo "All images were successfully created and pushed." 110 | else 111 | echo "The following images were successfully created and pushed:" 112 | for IMAGE in "${SUCCESSFUL_IMAGES[@]}"; do 113 | echo "$IMAGE" 114 | done 115 | echo "Some images failed to be created or pushed. Please check the logs for details." 116 | fi 117 | 118 | echo "Script execution completed." 119 | -------------------------------------------------------------------------------- /build-ui.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Prompt for inputs 4 | read -p "Enter AWS Region: " region 5 | read -p "Enter Admin Cognito User Pool ID: " userPoolId 6 | read -p "Enter Admin Cognito Client ID: " clientId 7 | read -p "Enter CloudFront Distribution URL: " cloudfrontUrl 8 | read -p "Enter Platform API Gateway URL: " apiUrl 9 | read -p "Enter UI S3 Bucket Name: " bucketName 10 | read -p "Enter Admin Cognito User Pool Domain: " cognitoDomain 11 | 12 | # Replace placeholders in cognito-config.js 13 | cognitoConfigPath="./admin-ui/frontend/foundations-admin/plugins/cognito-config.js" 14 | sed -i '' "s||$region|g" $cognitoConfigPath 15 | sed -i '' "s||$userPoolId|g" $cognitoConfigPath 16 | sed -i '' "s||$clientId|g" $cognitoConfigPath 17 | sed -i '' "s||$cloudfrontUrl|g" $cognitoConfigPath 18 | sed -i '' "s||$cloudfrontUrl|g" $cognitoConfigPath 19 | sed -i '' "s||$cognitoDomain|g" $cognitoConfigPath 20 | 21 | # Replace placeholders in nuxt.config.ts 22 | nuxtConfigPath="./admin-ui/frontend/foundations-admin/nuxt.config.ts" 23 | sed -i '' "s||$apiUrl|g" $nuxtConfigPath 24 | sed -i '' "s||$cognitoDomain|g" $nuxtConfigPath 25 | 26 | cd ./admin-ui/frontend/foundations-admin/ 27 | sudo npm install 28 | # Run the existing upload script 29 | sudo rm -rf .output 30 | sudo rm -rf dist 31 | sudo rm -rf .nuxt 32 | sudo npx nuxi generate 33 | sudo -E aws s3 sync .output/public s3://$bucketName -------------------------------------------------------------------------------- /cdk/GenAIFoundations/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project 2 | 3 | This is a blank project for CDK development with TypeScript. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `npx cdk deploy` deploy this stack to your default AWS account/region 13 | * `npx cdk diff` compare deployed stack with current state 14 | * `npx cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /cdk/GenAIFoundations/bin/gen_ai_foundations.ts: -------------------------------------------------------------------------------- 1 | import 'source-map-support/register'; 2 | import * as cdk from 'aws-cdk-lib'; 3 | import { GenAIFoundationsStack } from '../lib/foundations-main-stack'; 4 | import { WafStack } from '../lib/waf-stack'; 5 | import * as crypto from "crypto"; 6 | 7 | const app = new cdk.App(); 8 | const uniqueCode = crypto.randomBytes(8).toString("hex").slice(0, 5); 9 | 10 | const wafStack = new WafStack(app, 'WafStack'+uniqueCode, { 11 | env: { region: 'us-east-1' }, 12 | crossRegionReferences:true 13 | }); 14 | 15 | new GenAIFoundationsStack(app, 'GenAIFoundations'+uniqueCode, { 16 | env: { 17 | region: process.env.CDK_DEFAULT_REGION 18 | }, 19 | crossRegionReferences:true, 20 | webAclArn: wafStack.webAclArn 21 | }); -------------------------------------------------------------------------------- /cdk/GenAIFoundations/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/gen_ai_foundations.ts", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "**/*.d.ts", 11 | "**/*.js", 12 | "tsconfig.json", 13 | "package*.json", 14 | "yarn.lock", 15 | "node_modules", 16 | "test" 17 | ] 18 | }, 19 | "context": { 20 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 21 | "@aws-cdk/core:checkSecretUsage": true, 22 | "@aws-cdk/core:target-partitions": [ 23 | "aws", 24 | "aws-cn" 25 | ], 26 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 27 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 28 | "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, 29 | "@aws-cdk/aws-iam:minimizePolicies": true, 30 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 31 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 32 | "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, 33 | "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, 34 | "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, 35 | "@aws-cdk/core:enablePartitionLiterals": true, 36 | "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, 37 | "@aws-cdk/aws-iam:standardizedServicePrincipals": true, 38 | "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, 39 | "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, 40 | "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, 41 | "@aws-cdk/aws-route53-patters:useCertificate": true, 42 | "@aws-cdk/customresources:installLatestAwsSdkDefault": false, 43 | "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, 44 | "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, 45 | "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, 46 | "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, 47 | "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, 48 | "@aws-cdk/aws-redshift:columnId": true, 49 | "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, 50 | "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, 51 | "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, 52 | "@aws-cdk/aws-kms:aliasNameRef": true, 53 | "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, 54 | "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, 55 | "@aws-cdk/aws-efs:denyAnonymousAccess": true, 56 | "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, 57 | "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, 58 | "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, 59 | "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, 60 | "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, 61 | "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true, 62 | "@aws-cdk/aws-codepipeline-actions:useNewDefaultBranchForCodeCommitSource": true, 63 | "@aws-cdk/aws-cloudwatch-actions:changeLambdaPermissionLogicalIdForLambdaAction": true, 64 | "@aws-cdk/aws-codepipeline:crossAccountKeysDefaultValueToFalse": true, 65 | "@aws-cdk/aws-codepipeline:defaultPipelineTypeToV2": true, 66 | "@aws-cdk/aws-kms:reduceCrossAccountRegionPolicyScope": true, 67 | "@aws-cdk/aws-eks:nodegroupNameAttribute": true, 68 | "cdk-migrate": true 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /cdk/GenAIFoundations/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /cdk/GenAIFoundations/lib/waf-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from "aws-cdk-lib"; 2 | import * as wafv2 from 'aws-cdk-lib/aws-wafv2'; 3 | import * as cloudfront from 'aws-cdk-lib/aws-cloudfront'; 4 | import * as s3 from "aws-cdk-lib/aws-s3"; 5 | import { Construct } from "constructs"; 6 | import * as crypto from "crypto"; 7 | 8 | 9 | 10 | // Stack to create the WAF WebACL in us-east-1 11 | export class WafStack extends cdk.Stack { 12 | public readonly webAclArn: string; 13 | 14 | constructor(scope: Construct, id: string, props?: cdk.StackProps) { 15 | super(scope, id, { 16 | ...props, 17 | env: { region: 'us-east-1' }, 18 | }); 19 | 20 | const userEmail = new cdk.CfnParameter(this, "userEmail", { 21 | type: "String", 22 | description: "Email address for the admin portal user. Temporary password will be sent to this email address.", 23 | }); 24 | 25 | // Create the WAF WebACL 26 | 27 | const uniqueCode = crypto.randomBytes(8).toString("hex").slice(0, 5); 28 | 29 | const webAcl = new wafv2.CfnWebACL(this, 'FoundationsWebACL'+uniqueCode, { 30 | defaultAction: { 31 | allow: {}, 32 | }, 33 | scope: 'CLOUDFRONT', 34 | visibilityConfig: { 35 | cloudWatchMetricsEnabled: true, 36 | metricName: 'foundations-web-acl'+uniqueCode, 37 | sampledRequestsEnabled: true, 38 | }, 39 | rules: [ 40 | { 41 | name: 'AWS-AWSManagedRulesCommonRuleSet', 42 | priority: 0, 43 | overrideAction: { none: {} }, 44 | statement: { 45 | managedRuleGroupStatement: { 46 | vendorName: 'AWS', 47 | name: 'AWSManagedRulesCommonRuleSet', 48 | }, 49 | }, 50 | visibilityConfig: { 51 | cloudWatchMetricsEnabled: true, 52 | metricName: 'AWSManagedRulesCommonRuleSet', 53 | sampledRequestsEnabled: true, 54 | }, 55 | }, 56 | ], 57 | }); 58 | this.webAclArn = webAcl.attrArn; 59 | } 60 | } -------------------------------------------------------------------------------- /cdk/GenAIFoundations/migrate.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "This file is generated by cdk migrate. It will be automatically deleted after the first successful deployment of this app to the environment of the original resources.", 3 | "Source": "localfile" 4 | } -------------------------------------------------------------------------------- /cdk/GenAIFoundations/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gen_ai_foundations", 3 | "version": "0.1.0", 4 | "bin": { 5 | "gen_ai_foundations": "bin/gen_ai_foundations.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@types/jest": "^29.5.12", 15 | "@types/node": "20.12.7", 16 | "jest": "^29.7.0", 17 | "ts-jest": "^29.1.2", 18 | "aws-cdk": "2.139.1", 19 | "ts-node": "^10.9.2", 20 | "typescript": "~5.4.5" 21 | }, 22 | "dependencies": { 23 | "aws-cdk-lib": "2.139.1", 24 | "constructs": "^10.0.0", 25 | "source-map-support": "^0.5.21" 26 | } 27 | } -------------------------------------------------------------------------------- /cdk/GenAIFoundations/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2020", 7 | "dom" 8 | ], 9 | "declaration": true, 10 | "strict": true, 11 | "noImplicitAny": true, 12 | "strictNullChecks": true, 13 | "noImplicitThis": true, 14 | "alwaysStrict": true, 15 | "noUnusedLocals": false, 16 | "noUnusedParameters": false, 17 | "noImplicitReturns": true, 18 | "noFallthroughCasesInSwitch": false, 19 | "inlineSourceMap": true, 20 | "inlineSources": true, 21 | "experimentalDecorators": true, 22 | "strictPropertyInitialization": false, 23 | "typeRoots": [ 24 | "./node_modules/@types" 25 | ] 26 | }, 27 | "exclude": [ 28 | "node_modules", 29 | "cdk.out" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /config.txt: -------------------------------------------------------------------------------- 1 | foundations_model_invocation ./services/foundations_model_invocation 2 | foundations_document_processing ./services/foundations_document_processing 3 | foundations_extraction ./services/foundations_extraction 4 | foundations_chunking ./services/foundations_chunking 5 | foundations_vectorization ./services/foundations_vectorization 6 | foundations_vector_process ./services/foundations_vector_job_process 7 | foundations_prompt_template ./services/foundations_prompt_management 8 | admin_backend_service ./admin-ui/backend/app 9 | 10 | -------------------------------------------------------------------------------- /cookbook/notebooks/quickstarts/reqs.txt: -------------------------------------------------------------------------------- 1 | altair==5.3.0 2 | anyio==4.4.0 3 | attrs==23.2.0 4 | blinker==1.8.2 5 | boto3==1.34.119 6 | botocore==1.34.119 7 | cachetools==5.3.3 8 | certifi==2024.6.2 9 | charset-normalizer==3.3.2 10 | click==8.1.7 11 | exceptiongroup==1.2.1 12 | gitdb==4.0.11 13 | GitPython==3.1.43 14 | h11==0.14.0 15 | httpcore==1.0.5 16 | httpx==0.27.0 17 | idna==3.7 18 | Jinja2==3.1.4 19 | jmespath==1.0.1 20 | jsonschema==4.22.0 21 | jsonschema-specifications==2023.12.1 22 | markdown-it-py==3.0.0 23 | MarkupSafe==2.1.5 24 | mdurl==0.1.2 25 | numpy==1.26.4 26 | packaging==24.0 27 | pandas==2.2.2 28 | pillow==10.3.0 29 | protobuf==4.25.3 30 | pyarrow==16.1.0 31 | pydeck==0.9.1 32 | Pygments==2.18.0 33 | python-dateutil==2.9.0.post0 34 | python-dotenv 35 | pytz==2024.1 36 | referencing==0.35.1 37 | requests==2.32.3 38 | rich==13.7.1 39 | rpds-py==0.18.1 40 | s3transfer==0.10.1 41 | six==1.16.0 42 | smmap==5.0.1 43 | sniffio==1.3.1 44 | streamlit==1.35.0 45 | tenacity==8.3.0 46 | toml==0.10.2 47 | toolz==0.12.1 48 | tornado==6.4 49 | typing_extensions==4.12.1 50 | tzdata==2024.1 51 | urllib3==1.26.19 52 | -------------------------------------------------------------------------------- /cookbook/notebooks/quickstarts/utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import base64 4 | import httpx 5 | import requests 6 | import boto3 7 | 8 | class CognitoTokenManager: 9 | def __init__(self, client_id, client_secret, user_pool_id, region, domain): 10 | self.client_id = client_id 11 | self.client_secret = client_secret 12 | self.user_pool_id = user_pool_id 13 | self.region = region 14 | self.token = None 15 | self.expiry = 0 16 | self.domain = domain 17 | 18 | def get_token(self): 19 | current_time = time.time() 20 | if self.token is None or current_time >= self.expiry: 21 | self.token = self._fetch_token() 22 | return self.token 23 | 24 | def _fetch_token_with_secret(self): 25 | auth_header = base64.b64encode(f'{self.client_id}:{self.client_secret}'.encode('utf-8')).decode('utf-8') 26 | token_url = f'https://{self.domain}/oauth2/token' 27 | headers = { 28 | 'Content-Type': 'application/x-www-form-urlencoded', 29 | 'Authorization': f'Basic {auth_header}' 30 | } 31 | data = { 32 | 'grant_type': 'client_credentials', 33 | 'client_id': self.client_id, 34 | 'scope': 'genaifoundations/read' 35 | } 36 | response = requests.post(token_url, headers=headers, data=data, timeout=60) 37 | response.raise_for_status() 38 | if response.status_code == 200: 39 | token_data = response.json() 40 | self.expiry = time.time() + token_data['expires_in'] - 60 # Subtract 60 seconds to handle latency 41 | return token_data['access_token'] 42 | else: 43 | raise Exception(f"Failed to get access token: {response.status_code} {response.text}") 44 | 45 | 46 | async def get_cognito_public_keys(): 47 | async with httpx.AsyncClient() as client: 48 | response = await client.get(conf.COGNITO_JWK_URL) 49 | response.raise_for_status() 50 | return response.json() 51 | -------------------------------------------------------------------------------- /cookbook/sample-apps/document_comparision/README.md: -------------------------------------------------------------------------------- 1 | Document Comparison is a simple streamlit application to compare two files. The application provides a comparison summary of the provided files. 2 | 3 | 1. Make sure you have configured a virtual environment. 4 | 5 | ```bash 6 | python -m venv venv 7 | source venv/bin/active 8 | pip install -r reqs.txt 9 | ``` 10 | 11 | 2. Create the following environment variables. Please get these values from your platform adminstrator. 12 | export COGNITO_CLIENT_ID='' 13 | export COGNITO_CLIENT_SECRET='' 14 | export COGNITO_USER_POOL_ID='' 15 | export COGNITO_REGION='' 16 | export COGNITO_DOMAIN='' 17 | export PLATFORM_API_URL='' 18 | 19 | 3. Run the streamlit app 20 | streamlit run app.py -------------------------------------------------------------------------------- /cookbook/sample-apps/document_comparision/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from sdk.accelerator import GenerativeAIAccelerator 3 | import requests 4 | import time 5 | 6 | accelerator = GenerativeAIAccelerator() 7 | _document = accelerator.document_service 8 | _model = accelerator.model_service 9 | 10 | prompt_template = """ 11 | Give two pieces of text, compare them and list out the differences. 12 | 13 | Text 1: 14 | {text1} 15 | 16 | Text 2: 17 | {text2} 18 | """ 19 | 20 | def upload_file(file, extraction_job): 21 | upload_url = _document.register_file_for_extraction(extraction_job, file.name)['upload_url'] 22 | requests.put(upload_url, data=file) 23 | 24 | def check_extraction_status(extraction_job): 25 | while True: 26 | response = _document.get_extraction_job_status(extraction_job) 27 | job_status = response['status'] 28 | if job_status in ['COMPLETED', 'FAILED', 'COMPLETED_WITH_ERRORS']: 29 | return job_status 30 | time.sleep(5) 31 | 32 | def extract_text(extraction_job, file_name): 33 | response = _document.get_file_status(extraction_job, file_name) 34 | text_response = requests.get(response['result_url']).json() 35 | return "".join(page['page_text'] for page in text_response['pages']) 36 | 37 | def compare_documents(text1, text2): 38 | compare_prompt = prompt_template.format(text1=text1, text2=text2) 39 | response = _model.invoke_model( 40 | model_name="ANTHROPIC_CLAUDE_3_SONNET_V1", 41 | prompt=compare_prompt, 42 | max_tokens=5000, 43 | temperature=0.7, 44 | top_p=0.9, 45 | top_k=50, 46 | stop_sequences=["\\n"] 47 | ) 48 | return response['output_text'] 49 | 50 | def process_files(file1, file2): 51 | extraction_job = _document.create_extraction_job()['extraction_job_id'] 52 | st.write(f"Extracting text from files ..") 53 | 54 | upload_file(file1, extraction_job) 55 | upload_file(file2, extraction_job) 56 | 57 | _document.start_extraction_job(extraction_job) 58 | job_status = check_extraction_status(extraction_job) 59 | 60 | if job_status == 'COMPLETED': 61 | st.write("Extraction completed successfully") 62 | text1 = extract_text(extraction_job, file1.name) 63 | text2 = extract_text(extraction_job, file2.name) 64 | comparison_result = compare_documents(text1, text2) 65 | st.write(comparison_result) 66 | else: 67 | st.write(f"Extraction failed with status: {job_status}") 68 | 69 | # Sidebar for file uploads 70 | with st.sidebar: 71 | st.title("Document Comparison (PDF)") 72 | st.markdown("#### Upload Documents") 73 | file1 = st.sidebar.file_uploader("Choose the first PDF file", type=["pdf"], key="file1") 74 | file2 = st.sidebar.file_uploader("Choose the second PDF file", type=["pdf"], key="file2") 75 | 76 | 77 | if file1 and file2: 78 | with st.spinner("Processing..."): 79 | process_files(file1, file2) 80 | -------------------------------------------------------------------------------- /cookbook/sample-apps/document_comparision/reqs.txt: -------------------------------------------------------------------------------- 1 | altair==5.3.0 2 | anyio==4.4.0 3 | attrs==23.2.0 4 | blinker==1.8.2 5 | boto3==1.34.119 6 | botocore==1.34.119 7 | cachetools==5.3.3 8 | certifi==2024.6.2 9 | charset-normalizer==3.3.2 10 | click==8.1.7 11 | exceptiongroup==1.2.1 12 | gitdb==4.0.11 13 | GitPython==3.1.43 14 | h11==0.14.0 15 | httpcore==1.0.5 16 | httpx==0.27.0 17 | idna==3.7 18 | Jinja2==3.1.4 19 | jmespath==1.0.1 20 | jsonschema==4.22.0 21 | jsonschema-specifications==2023.12.1 22 | markdown-it-py==3.0.0 23 | MarkupSafe==2.1.5 24 | mdurl==0.1.2 25 | numpy==1.26.4 26 | packaging==24.0 27 | pandas==2.2.2 28 | pillow==10.3.0 29 | protobuf==4.25.3 30 | pyarrow==16.1.0 31 | pydeck==0.9.1 32 | Pygments==2.18.0 33 | python-dateutil==2.9.0.post0 34 | pytz==2024.1 35 | referencing==0.35.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | rpds-py==0.18.1 39 | s3transfer==0.10.1 40 | six==1.16.0 41 | smmap==5.0.1 42 | sniffio==1.3.1 43 | streamlit==1.35.0 44 | tenacity==8.3.0 45 | toml==0.10.2 46 | toolz==0.12.1 47 | tornado==6.4 48 | typing_extensions==4.12.1 49 | tzdata==2024.1 50 | urllib3==1.26.19 51 | -------------------------------------------------------------------------------- /cookbook/sample-apps/document_comparision/sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/cookbook/sample-apps/document_comparision/sdk/__init__.py -------------------------------------------------------------------------------- /cookbook/sample-apps/document_summarization/README.md: -------------------------------------------------------------------------------- 1 | Document Summarization is a simple streamlit application to summarize a file. The application provides summaries for each page and also a summary of summaries. 2 | 3 | 1. Make sure you have configured a virtual environment. 4 | 5 | ```bash 6 | python -m venv venv 7 | source venv/bin/active 8 | pip install -r reqs.txt 9 | ``` 10 | 11 | 2. Create the following environment variables. Please get these values from your platform adminstrator. 12 | export COGNITO_CLIENT_ID='' 13 | export COGNITO_CLIENT_SECRET='' 14 | export COGNITO_USER_POOL_ID='' 15 | export COGNITO_REGION='' 16 | export COGNITO_DOMAIN='' 17 | export PLATFORM_API_URL='' 18 | 19 | 3. Run the streamlit app 20 | streamlit run app.py -------------------------------------------------------------------------------- /cookbook/sample-apps/document_summarization/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from sdk.accelerator import GenerativeAIAccelerator 3 | import requests 4 | import time 5 | 6 | accelerator = GenerativeAIAccelerator() 7 | _document = accelerator.document_service 8 | _model = accelerator.model_service 9 | 10 | page_summary_prompt_template = """ 11 | Summarize the following text: 12 | 13 | {text} 14 | """ 15 | 16 | summary_of_summaries_prompt_template = """ 17 | Summarize the following summaries: 18 | 19 | {summaries} 20 | """ 21 | 22 | def upload_file(file, extraction_job): 23 | upload_url = _document.register_file_for_extraction(extraction_job, file.name)['upload_url'] 24 | requests.put(upload_url, data=file) 25 | 26 | def check_extraction_status(extraction_job): 27 | while True: 28 | response = _document.get_extraction_job_status(extraction_job) 29 | job_status = response['status'] 30 | if job_status in ['COMPLETED', 'FAILED', 'COMPLETED_WITH_ERRORS']: 31 | return job_status 32 | time.sleep(5) 33 | 34 | def extract_text(extraction_job, file_name): 35 | response = _document.get_file_status(extraction_job, file_name) 36 | text_response = requests.get(response['result_url']).json() 37 | return [page['page_text'] for page in text_response['pages']] 38 | 39 | def summarize_text(text): 40 | summary_prompt = page_summary_prompt_template.format(text=text) 41 | response = _model.invoke_model( 42 | model_name="ANTHROPIC_CLAUDE_3_SONNET_V1", 43 | prompt=summary_prompt, 44 | max_tokens=5000, 45 | temperature=0.7, 46 | top_p=0.9, 47 | top_k=50, 48 | stop_sequences=["\\n"] 49 | ) 50 | return response['output_text'] 51 | 52 | def summarize_summaries(summaries): 53 | summaries_text = "\n\n".join(summaries) 54 | summary_of_summaries_prompt = summary_of_summaries_prompt_template.format(summaries=summaries_text) 55 | response = _model.invoke_model( 56 | model_name="ANTHROPIC_CLAUDE_3_SONNET_V1", 57 | prompt=summary_of_summaries_prompt, 58 | max_tokens=5000, 59 | temperature=0.7, 60 | top_p=0.9, 61 | top_k=50, 62 | stop_sequences=["\\n"] 63 | ) 64 | return response['output_text'] 65 | 66 | def process_file(file): 67 | with st.spinner("Summarizing .."): 68 | extraction_job = _document.create_extraction_job()['extraction_job_id'] 69 | st.write(f"Extraction job ID: {extraction_job}") 70 | 71 | upload_file(file, extraction_job) 72 | 73 | _document.start_extraction_job(extraction_job) 74 | job_status = check_extraction_status(extraction_job) 75 | 76 | if job_status == 'COMPLETED': 77 | st.write("Extraction completed successfully") 78 | pages_text = extract_text(extraction_job, file.name) 79 | 80 | page_summaries = [] 81 | for i, page_text in enumerate(pages_text): 82 | summary = summarize_text(page_text) 83 | page_summaries.append(summary) 84 | with st.expander(f"Page {i+1} Summary"): 85 | st.write(summary) 86 | 87 | summary_of_summaries = summarize_summaries(page_summaries) 88 | with st.expander("Summary of Summaries"): 89 | st.write(summary_of_summaries) 90 | else: 91 | st.write(f"Extraction failed with status: {job_status}") 92 | 93 | # Sidebar for file upload 94 | with st.sidebar: 95 | st.title("Document Summarizer") 96 | st.write("Summarize a PDF document using AI") 97 | file = st.sidebar.file_uploader("Choose a PDF file", type=["pdf"], key="file") 98 | 99 | if file: 100 | process_file(file) 101 | -------------------------------------------------------------------------------- /cookbook/sample-apps/document_summarization/reqs.txt: -------------------------------------------------------------------------------- 1 | altair==5.3.0 2 | anyio==4.4.0 3 | attrs==23.2.0 4 | blinker==1.8.2 5 | boto3==1.34.119 6 | botocore==1.34.119 7 | cachetools==5.3.3 8 | certifi==2024.6.2 9 | charset-normalizer==3.3.2 10 | click==8.1.7 11 | exceptiongroup==1.2.1 12 | gitdb==4.0.11 13 | GitPython==3.1.43 14 | h11==0.14.0 15 | httpcore==1.0.5 16 | httpx==0.27.0 17 | idna==3.7 18 | Jinja2==3.1.4 19 | jmespath==1.0.1 20 | jsonschema==4.22.0 21 | jsonschema-specifications==2023.12.1 22 | markdown-it-py==3.0.0 23 | MarkupSafe==2.1.5 24 | mdurl==0.1.2 25 | numpy==1.26.4 26 | packaging==24.0 27 | pandas==2.2.2 28 | pillow==10.3.0 29 | protobuf==4.25.3 30 | pyarrow==16.1.0 31 | pydeck==0.9.1 32 | Pygments==2.18.0 33 | python-dateutil==2.9.0.post0 34 | pytz==2024.1 35 | referencing==0.35.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | rpds-py==0.18.1 39 | s3transfer==0.10.1 40 | six==1.16.0 41 | smmap==5.0.1 42 | sniffio==1.3.1 43 | streamlit==1.35.0 44 | tenacity==8.3.0 45 | toml==0.10.2 46 | toolz==0.12.1 47 | tornado==6.4 48 | typing_extensions==4.12.1 49 | tzdata==2024.1 50 | urllib3==1.26.19 51 | -------------------------------------------------------------------------------- /cookbook/sample-apps/document_summarization/sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/cookbook/sample-apps/document_summarization/sdk/__init__.py -------------------------------------------------------------------------------- /cookbook/sample-apps/simple-chat/README.md: -------------------------------------------------------------------------------- 1 | This is simple chat app. 2 | 3 | 1. Make sure you have configured a virtual environment. 4 | 5 | ```bash 6 | python -m venv venv 7 | source venv/bin/active 8 | pip install -r reqs.txt 9 | ``` 10 | 11 | 2. Create the following environment variables. Please get these values from your platform adminstrator. 12 | export COGNITO_CLIENT_ID='' 13 | export COGNITO_CLIENT_SECRET='' 14 | export COGNITO_USER_POOL_ID='' 15 | export COGNITO_REGION='' 16 | export COGNITO_DOMAIN='' 17 | export PLATFORM_API_URL='' 18 | 19 | 3. Run the streamlit app 20 | streamlit run app.py -------------------------------------------------------------------------------- /cookbook/sample-apps/simple-chat/app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from sdk.accelerator import GenerativeAIAccelerator 3 | import requests 4 | 5 | accelerator = GenerativeAIAccelerator() 6 | _model = accelerator.model_service 7 | 8 | def get_model_list(): 9 | response = _model.list_models() 10 | return response['text_models'] 11 | 12 | with st.sidebar: 13 | st.title("Simple Chat App") 14 | st.divider() 15 | model_selected = st.selectbox("Select Model", [model["model_name"] for model in get_model_list() if 'EMBED' not in model["model_name"]]) 16 | max_tokens = st.slider("Max Tokens", min_value=100, max_value=2000, value=200) 17 | temperature = st.slider("Temperature", min_value=0.1, max_value=1.0, value=0.7) 18 | 19 | 20 | model_prompt = """ 21 | Role: You are a helpful chatbot assistant. Respond appropriately to user's message/question. 22 | User:{text} 23 | """ 24 | 25 | def get_response(message, model): 26 | if 'TITAN' in model: 27 | stop_sequences = ["User:"] 28 | else: 29 | stop_sequences = ["\\n"] 30 | response = _model.invoke_model(model_name=model_selected, 31 | prompt= model_prompt.format(text=message), 32 | max_tokens=max_tokens, 33 | temperature=temperature, 34 | top_p=0.9, 35 | top_k=50, 36 | stop_sequences=stop_sequences) 37 | return response['output_text'] 38 | 39 | 40 | if "messages" not in st.session_state: 41 | st.session_state.messages = [{"role": "assistant", "content": "Hello! I am chatbot built on GenAI Foundational Platform. I can help you with your queries. Ask me anything."}] 42 | 43 | for message in st.session_state.messages: 44 | with st.chat_message(message["role"]): 45 | st.markdown(message["content"]) 46 | 47 | if prompt := st.chat_input("What is up?"): 48 | st.chat_message("user").markdown(prompt) 49 | st.session_state.messages.append({"role": "user", "content": prompt}) 50 | response = get_response(prompt , model_selected) 51 | with st.chat_message("assistant"): 52 | st.markdown(response) 53 | st.session_state.messages.append({"role": "assistant", "content": response}) -------------------------------------------------------------------------------- /cookbook/sample-apps/simple-chat/reqs.txt: -------------------------------------------------------------------------------- 1 | altair==5.3.0 2 | anyio==4.4.0 3 | attrs==23.2.0 4 | blinker==1.8.2 5 | boto3==1.34.119 6 | botocore==1.34.119 7 | cachetools==5.3.3 8 | certifi==2024.6.2 9 | charset-normalizer==3.3.2 10 | click==8.1.7 11 | exceptiongroup==1.2.1 12 | gitdb==4.0.11 13 | GitPython==3.1.43 14 | h11==0.14.0 15 | httpcore==1.0.5 16 | httpx==0.27.0 17 | idna==3.7 18 | Jinja2==3.1.4 19 | jmespath==1.0.1 20 | jsonschema==4.22.0 21 | jsonschema-specifications==2023.12.1 22 | markdown-it-py==3.0.0 23 | MarkupSafe==2.1.5 24 | mdurl==0.1.2 25 | numpy==1.26.4 26 | packaging==24.0 27 | pandas==2.2.2 28 | pillow==10.3.0 29 | protobuf==4.25.3 30 | pyarrow==16.1.0 31 | pydeck==0.9.1 32 | Pygments==2.18.0 33 | python-dateutil==2.9.0.post0 34 | pytz==2024.1 35 | referencing==0.35.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | rpds-py==0.18.1 39 | s3transfer==0.10.1 40 | six==1.16.0 41 | smmap==5.0.1 42 | sniffio==1.3.1 43 | streamlit==1.35.0 44 | tenacity==8.3.0 45 | toml==0.10.2 46 | toolz==0.12.1 47 | tornado==6.4 48 | typing_extensions==4.12.1 49 | tzdata==2024.1 50 | urllib3==1.26.19 51 | -------------------------------------------------------------------------------- /cookbook/sample-apps/simple-chat/sdk/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/cookbook/sample-apps/simple-chat/sdk/__init__.py -------------------------------------------------------------------------------- /docs/adminportal.md: -------------------------------------------------------------------------------- 1 | # Admin Portal Guide 2 | Admin Portal is deployed as part of this Gen AI Foundational Architecture to enable easy status monitoring of deployed microservices, onboard new applications and also provides an API playground to try out the API end points. 3 | After sucessful installation of the platform, you will be able to access the Admin Portal using Cloudfront Distribution URL. 4 | 5 | #### Login to Admin Portal 6 | 7 | Before you begin, ensure you have the following: 8 | 9 | 1. **Admin Email Address**: This is the email address you provided while deploying the CDK stack as part of the implementation steps. 10 | 2. **Password**: As part of implmentation, you must have received an email with temporary password. Use this for your initial login and then you will be prompted for password change. 11 | 12 | Using these credentials, you will be able to login to admin portal. 13 | 14 | ![adminlogin](../image/adminlogin.gif) 15 | 16 | #### Onboard Applications 17 | 18 | Applications leveraging the API endpoints will need to be onbarded to enable secure coomunication between the application and platform's endpoints. 19 | Endpoints are served via Amazon API Gateway which is secured by Amzaon Cognito authorizer (JWT authorizer) to restrict unautorized requests from applications. 20 | 21 | ![onboardapp](../image/adminonboardapp.gif) 22 | 23 | > Note: This implementaion uses Amzaon Cognito to authorize API requests from application. This can be swaped by any IDP that supports JWT based autorization for applications. 24 | 25 | #### Monitor Status of microservices 26 | Admin portal provides an easy and conveneitn way to keep a tab on deployed microservices and check their healt status as shown below. 27 | 28 | ![monitorstatus](../image/adminservices.png) 29 | 30 | #### Metrics Dashboard 31 | Admin portal has a dedicated tab that displays metrics of various microserices tracted at application level. For instance we can view metrics like `# of invocations`, `Total Input Tokens` for model invocation service by application. 32 | 33 | ![metrics](../image/adminmetrics.gif) 34 | 35 | 36 | #### API Playground 37 | Admin portal provides developers an easy way to try and test available endpoints before integrating them into application. 38 | 39 | ![apiplayground](../image/adminapiplayground.png) -------------------------------------------------------------------------------- /docs/api_docs/README.md: -------------------------------------------------------------------------------- 1 | > Clone the repo and open api_docs.html in your browser for API docs. The API docs is rendered using Swagger UI -------------------------------------------------------------------------------- /docs/api_docs/api_docs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Swagger UI 7 | 8 | 9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /docs/api_docs/assets/swagger-initializer.js: -------------------------------------------------------------------------------- 1 | window.onload = function() { 2 | // 3 | 4 | // the following lines will be replaced by docker/configurator, when it runs in a docker-container 5 | window.ui = SwaggerUIBundle({ 6 | url: "https://petstore.swagger.io/v2/swagger.json", 7 | dom_id: '#swagger-ui', 8 | deepLinking: true, 9 | presets: [ 10 | SwaggerUIBundle.presets.apis, 11 | SwaggerUIStandalonePreset 12 | ], 13 | plugins: [ 14 | SwaggerUIBundle.plugins.DownloadUrl 15 | ], 16 | layout: "StandaloneLayout" 17 | }); 18 | 19 | // 20 | }; 21 | -------------------------------------------------------------------------------- /docs/implementation.md: -------------------------------------------------------------------------------- 1 | # Implementation Guide 2 | 3 | #### Pre-requisites 4 | 5 | Before you begin, ensure you have the following: 6 | 7 | 1. **AWS Account**: You must have an AWS account with necessary permissions to create resources. 8 | 2. **AWS CLI**: Installed and configured on your machine. Follow [this guide](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html) to install AWS CLI. 9 | 3. **IAM User**: A user with admin privileges configured in AWS CLI using ```aws configure``` in the "default" profile. 10 | 4. **Node.js and npm**: Installed on your machine. Follow [this guide](https://nodejs.org/en/download/package-manager) for installation. If you are using an EC2 instance, you can follow [this guide](https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/setting-up-node-on-ec2-instance.html) 11 | 6. **Docker**: Ensure Docker is installed and running on your machine. Follow this guide for installation. 12 | 7. **Git**: Installed on your machine. Follow [this guide](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) for installation. 13 | 8. **Email**: An email address to be used as admin user of the platform. Admin Portal password will be sent to this email. 14 | 9. ***Install CDK CLI*** : Run ``` npm install -g aws-cdk ``` to install CDK 15 | 10. **Manage access to Amazon Bedrock foundation models**: You must have enabled foundational models in Amazon Bedrock. Follow [this guide](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to manage access. 16 | 17 | 18 | #### Implementation Steps 19 | 1. #### Clone repository 20 | ``` 21 | git clone https://github.com/aws-samples/generative-ai-applications-foundational-architecture.git 22 | ``` 23 | This will clone the repository to your local workstation. 24 | It will have the following folder structure. 25 | ``` 26 | generative-ai-applications-foundational-architecture 27 | ├── admin-ui # The Admin portal, frontend and backend 28 | │ ├── backend 29 | │ └── frontend 30 | ├── cdk # CDK application to deploy infrastructure 31 | │ └── GenAIFoundations 32 | ├── cookbook # Usage samples, notebooks and sample apps 33 | │ ├── notebooks 34 | │ ├── sample-apps 35 | │ └── sdk # SDK 36 | ├── docs # All documentation 37 | │ ├── adminportal.md 38 | │ ├── api_docs 39 | │ ├── implementation.md 40 | │ └── microservices.md 41 | ├── image 42 | │ ├── HighLevelArchitecture.png 43 | │ ├── adminapiplayground.png 44 | │ ├── adminlogin.gif 45 | │ ├── adminmetrics.gif 46 | │ ├── adminonboardapp.gif 47 | │ ├── adminservices.png 48 | │ ├── architecture.png 49 | │ ├── authentication_flow.png 50 | │ ├── chunkingprocess.png 51 | │ ├── extractionprocess.png 52 | │ └── how-it-works.png 53 | ├── services # Microservices with Docker files 54 | │ ├── foundations_chunking 55 | │ ├── foundations_document_processing 56 | │ ├── foundations_extraction 57 | │ ├── foundations_model_invocation 58 | │ ├── foundations_prompt_management 59 | │ ├── foundations_vector_job_process 60 | │ └── foundations_vectorization 61 | └── testing # Test scripts 62 | ├── auth 63 | └── models 64 | ├── build-images.sh # Builds docker images and pushes them to ECR 65 | ├── build-ui.sh # Builds UI locally and pushes the assets to S3 66 | ├── config.txt # List of microservices to build. Modify this if you want to push only specific images. 67 | ``` 68 | 2. #### Check for Docker daemon status and ensure it is up and running. In case DOcker is not running, please start the daemon and rerun the command to confirm the status. 69 | 70 | ``` 71 | docker info > /dev/null 2>&1 && echo "Docker is up and running" || echo "Docker is not running" 72 | ``` 73 | 74 | 3. #### Create IAM Service Linked Role for ECS 75 | ``` 76 | aws iam create-service-linked-role --aws-service-name ecs.amazonaws.com 77 | ``` 78 | > Note: Execute this command only if it is a new account where no ECS cluster was created. You can check if a service linked role for ECS already exists in IAM console. 79 | 4. #### Build docker container images and push them to ECR repository 80 | ``` 81 | sh ./build-images.sh 82 | ``` 83 | > ⚠️ **Important: After sucessful completion of the script login to AWS Account and navigate to ECR console and confirm below repositories and their correspoding images are available** 84 | 85 | > If some of images are not pushed to ECR, please modify config.txt file and re-run the script. 86 | - foundations_model_invocation 87 | - foundations_document_processing 88 | - foundations_extraction 89 | - foundations_chunking 90 | - foundations_vectorization 91 | - foundations_vector_process 92 | - foundations_prompt_management 93 | - admin_backend_service 94 | 95 | 96 | 5. #### Navigate to cdk project 97 | ``` 98 | cd cdk/GenAIFoundations 99 | ``` 100 | 6. #### Bootstrap your cdk project 101 | Run the following commands 102 | ``` 103 | npm install 104 | cdk bootstrap -c VPC_CIDR='' 105 | ``` 106 | > VPC CIDR will be used to create a new VPC where the ECS cluster will be hosted 107 | 7. #### Synthesize Cloudformation Template 108 | ``` 109 | cdk synth -c VPC_CIDR='' --parameters userEmail="" 110 | ``` 111 | > Execute this step if you want to use a static cloudformation template to deploy the stack. If you want to use cdk deploy to auto deploy the stack, skip to the next step. 112 | > userEmail is the email that will be used to provide access to the admin portal. Password will be sent to this email address. 113 | 8. #### Deploy the stack using ```cdk deploy``` 114 | ``` 115 | cdk deploy -c VPC_CIDR='' --parameters userEmail="" --all 116 | ``` 117 | > Skip this step if you are deploying using static Cloudformation Template (step 6) 118 | 9. #### Navigate back to the root of the project 119 | ``` 120 | cd ../.. 121 | ``` 122 | 10. #### Make a note of the Cloudformation Stack outputs 123 | > The following parameters will be outputted after the stack deployment completes. We will use these values as inputs in the next step. 124 | 125 | 126 | | Parameter Name | Details | 127 | ---------------------|---------------------------- 128 | AdminCognitoClientID | Admin Cognito Client ID | 129 | AdminCognitoUserPoolDomain | Admin Cognito User Pool Domain | 130 | AdminCognitoUserPoolID | Admin Cognito User Pool ID 131 | CloudFrontDistributionURL | CloudFront Distribution URL | 132 | PlatformAPIGatewayURL | Platform API Gateway URL | 133 | UIS3BucketName | UI S3 Bucket Name | 134 | 135 | 11. #### Build and deploy the UI 136 | > Note: Run below code using sudo credentials. 137 | 138 | ``` 139 | sudo sh ./build-ui.sh 140 | ``` 141 | > Please refer to the table in step 10 to enter the inputs to this script. 142 | 143 | > After you complete the steps, you can log in to the Admin Portal using the Cloudfront Distribution URL. For steps on using Admin Portal and onboarding new application follow [this guide](./docs/adminportal.md) 144 | 145 | > Please make sure you have enabled models on Bedrock. Follow [this guide](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to manage access. -------------------------------------------------------------------------------- /docs/microservices.md: -------------------------------------------------------------------------------- 1 | # Microservices Overview 2 | 3 | Generative AI Foundational Platform hosts the following microservices, deployed as Fargate Tasks. It enables standardized invocation of LLMs on Bedrock, facilitates prompt management, and streamlines document processing through extraction, chunking, and vectorization workflows. These services offer a scalable and efficient way to enable Generative AI capabilities for applications. 4 | 5 | Each microservice exposes a set of API endpoints accessible via the API gateway. For information on API endpoints, please refer to [API Docs](./api_docs). 6 | 7 | ### Model Invocation Service 8 | *** 9 | The Model Invocation Service standardizes LLM invocation calls by auto-parsing inputs and outputs. Developers can call any LLM with a set of standard parameters. This service currently supports text-to-text and text-to-embed models on Bedrock. 10 | Developers can use Model Invocation endpoints to: 11 | - Invoke a model on Bedrock using a text prompt or a series of messages. 12 | - Invoke a model on Bedrock asynchronously using a text prompt or a series of messages, returning an invocation ID to retrieve the result later (temporarily stored in Elasticache Redis). 13 | - Invoke a model on Bedrock with raw input (refer to Bedrock documentation for JSON formats). 14 | - Invoke embed models. 15 | 16 | The service logs all function calls by app and model, tracks token usage, and provides access to this data through an admin portal. 17 | 18 | ### Document Processing Service 19 | *** 20 | 21 | This service executes extraction and chunking workflows. 22 | 23 | **Extraction Workflow** 24 | 25 | Extracts text from documents while preserving layout information. Here's a high-level overview: 26 | 27 | ![extractionprocess](../image/extractionprocess.png) 28 | 29 | Process flow: 30 | 1. Create an extraction job and receive a Job ID. 31 | 2. Register files for extraction and receive a pre-signed URL for file upload. 32 | 3. Start an extraction job 33 | 4. Check the extraction job status. 34 | 5. Once the extraction job completes, obtain the results, including extracted text and tables, using S3 pre-signed URLs. 35 | 36 | *** 37 | 38 | **Chunking Workflow** 39 | 40 | 41 | The Chunking Service provides custom chunking strategies (by page, paragraph, character count) tailored to specific use cases, ensuring adaptability to varying requirements across the organization. 42 | This service can be enhanced or expanded to include new chunking strategies, promoting standardization. 43 | 44 | ![chunkingprocess](../image/chunkingprocess.png) 45 | 46 | The chunking process takes a completed extraction job's ID as input, reads the extracted text, and creates chunks using the specified chunking strategy. 47 | 48 | Process flow: 49 | 1. Create a chunking job for an extracted job and receive a Chunking Job ID. 50 | 2. Check the Chunking Job status. 51 | 3. Once the chunking job completes, obtain the results, including the pre-signed URL for the chunked file. 52 | 53 | 54 | 55 | ### Vectorization Service 56 | *** 57 | The Vectorization Service facilitates the creation, management, and retrieval of vector representations of textual data, enabling efficient semantic search. 58 | 59 | Process Flow: 60 | 1. Create a vector store and obtain the store ID for creating an index. 61 | 2. Check the vector store status and wait until it's active. 62 | 3. Create an index for the vector store and wait until it's active. 63 | 4. Trigger a vectorization job by passing the completed chunking job ID. This will vectorize each chunk and store it in the OpenSearch Serverless vector index. 64 | 5. Query the vector store for semantic search by passing the index ID and natural language query. 65 | 66 | While the current implementation supports only OpenSearch Serverless, the solution is designed to be extensible, and additional vector databases can be onboarded in the future. 67 | 68 | 69 | 70 | ### Prompt Management Service 71 | *** 72 | The Prompt Management Service facilitates management of Prompt Templates at scale. It supports versioning, easy retrieval, and dynamic prompt insertion. 73 | 74 | 1. Save and version prompt templates. 75 | 2. Get the latest version of a prompt template by name. 76 | 3. Get all versions of a prompt template by name. 77 | 4. Get a specific version of a prompt template by name and version. 78 | 79 | 80 | 81 | ### Admin Backend Service 82 | *** 83 | The Admin Backend Service supports the [admin dashboard](../docs/adminportal.md), providing a centralized interface for monitoring and managing microservice status, invocation metrics, extraction jobs, token consumption, and errors. -------------------------------------------------------------------------------- /image/HighLevelArchitecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/HighLevelArchitecture.png -------------------------------------------------------------------------------- /image/adminapiplayground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/adminapiplayground.png -------------------------------------------------------------------------------- /image/adminlogin.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/adminlogin.gif -------------------------------------------------------------------------------- /image/adminmetrics.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/adminmetrics.gif -------------------------------------------------------------------------------- /image/adminonboardapp.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/adminonboardapp.gif -------------------------------------------------------------------------------- /image/adminservices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/adminservices.png -------------------------------------------------------------------------------- /image/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/architecture.png -------------------------------------------------------------------------------- /image/authentication_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/authentication_flow.png -------------------------------------------------------------------------------- /image/chunkingprocess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/chunkingprocess.png -------------------------------------------------------------------------------- /image/deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/deployment.png -------------------------------------------------------------------------------- /image/extractionprocess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/extractionprocess.png -------------------------------------------------------------------------------- /image/how-it-works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/generative-ai-applications-foundational-architecture/b237eeff1f4bbb6526a08941f9b6fa53d90c8c64/image/how-it-works.png -------------------------------------------------------------------------------- /sdk/reqs.txt: -------------------------------------------------------------------------------- 1 | altair==5.3.0 2 | anyio==4.4.0 3 | attrs==23.2.0 4 | blinker==1.8.2 5 | boto3==1.34.119 6 | botocore==1.34.119 7 | cachetools==5.3.3 8 | certifi==2024.6.2 9 | charset-normalizer==3.3.2 10 | click==8.1.7 11 | exceptiongroup==1.2.1 12 | gitdb==4.0.11 13 | GitPython==3.1.43 14 | h11==0.14.0 15 | httpcore==1.0.5 16 | httpx==0.27.0 17 | idna==3.7 18 | Jinja2==3.1.4 19 | jmespath==1.0.1 20 | jsonschema==4.22.0 21 | jsonschema-specifications==2023.12.1 22 | markdown-it-py==3.0.0 23 | MarkupSafe==2.1.5 24 | mdurl==0.1.2 25 | numpy==1.26.4 26 | packaging==24.0 27 | pandas==2.2.2 28 | pillow==10.3.0 29 | protobuf==4.25.3 30 | pyarrow==16.1.0 31 | pydeck==0.9.1 32 | Pygments==2.18.0 33 | python-dateutil==2.9.0.post0 34 | pytz==2024.1 35 | python-dotenv 36 | referencing==0.35.1 37 | requests==2.32.3 38 | rich==13.7.1 39 | rpds-py==0.18.1 40 | s3transfer==0.10.1 41 | six==1.16.0 42 | smmap==5.0.1 43 | sniffio==1.3.1 44 | streamlit==1.35.0 45 | tenacity==8.3.0 46 | toml==0.10.2 47 | toolz==0.12.1 48 | tornado==6.4 49 | typing_extensions==4.12.1 50 | tzdata==2024.1 51 | urllib3==1.26.19 52 | -------------------------------------------------------------------------------- /services/foundations_chunking/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_chunking/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import asyncio 5 | from fastapi import FastAPI, BackgroundTasks, HTTPException, Depends 6 | from pydantic import BaseModel 7 | import boto3 8 | from botocore.config import Config 9 | from typing import Dict, Any 10 | import requests 11 | from utils.fixed_size_chunking import FixedSizeChunker 12 | from utils.recursive_chunking import RecursiveChunker 13 | from utils.page_wise_chunking import PagewiseChunker 14 | from utils.json_chunking import JSONChunker 15 | from typing import List 16 | from models import ChunkingJobs, ChunkingJobFiles 17 | 18 | # Configure structured logging 19 | logging.basicConfig(level=logging.INFO) 20 | logger = logging.getLogger("chunking_processor") 21 | logger.setLevel(logging.INFO) 22 | 23 | # Environment variables for local testing and ECS task 24 | QUEUE_URL = os.getenv('QUEUE_URL') 25 | RESULTS_S3_BUCKET = os.getenv('RESULTS_S3_BUCKET') 26 | MAX_RETRIES = int(os.getenv('MAX_RETRIES', '10')) 27 | REGION_NAME = '' 28 | MAX_CONCURRENT_TASKS = int(os.getenv('MAX_CONCURRENT_TASKS', '10')) 29 | VISIBILITY_TIMEOUT = int(os.getenv('VISIBILITY_TIMEOUT', '600')) # in seconds (10 minutes) 30 | ECS_METADATA_URL = os.getenv("ECS_CONTAINER_METADATA_URI_V4", "") 31 | CHUNKING_JOBS_TABLE = os.getenv('CHUNKING_JOBS_TABLE') 32 | CHUNKING_JOBS_FILES_TABLE = os.getenv('CHUNKING_JOBS_FILES_TABLE') 33 | 34 | 35 | 36 | # Global variables 37 | session = None 38 | s3_client = None 39 | sqs_client = None 40 | dynamodb = None 41 | 42 | retry_config = Config(retries={"max_attempts": MAX_RETRIES, "mode": "standard"}) 43 | 44 | # Background task checking interval (in seconds) 45 | CHECK_INTERVAL = 60 46 | poll_task = None 47 | 48 | app = FastAPI() 49 | 50 | 51 | def get_boto3_clients(region_name): 52 | session = boto3.Session(region_name=region_name) 53 | s3_client = session.client('s3', config=retry_config) 54 | sqs_client = session.client('sqs', config=retry_config) 55 | dynamodb_client = session.client('dynamodb', config=retry_config) 56 | return s3_client, sqs_client, dynamodb_client 57 | 58 | def save_chunks_to_s3(bucket: str, file_path: str, chunks: List[dict]): 59 | try: 60 | s3 = boto3.client('s3') 61 | s3.put_object(Bucket=bucket, Key=file_path, Body=json.dumps(chunks)) 62 | logger.info(f"Chunks saved to S3: {file_path}") 63 | except Exception as e: 64 | raise e 65 | 66 | def read_file_from_s3(file_path: str) -> str: 67 | try: 68 | s3 = boto3.client('s3') 69 | response = s3.get_object(Bucket=RESULTS_S3_BUCKET, Key=file_path) 70 | data = json.loads(response['Body'].read()) 71 | return data 72 | except Exception as e: 73 | raise e 74 | 75 | async def poll_sqs( sqs_client, dynamodb, s3_client, semaphore): 76 | logger.info("Polling SQS queue") 77 | 78 | while True: 79 | print("Polling SQS queue") 80 | try: 81 | response = sqs_client.receive_message( 82 | QueueUrl=QUEUE_URL, 83 | MaxNumberOfMessages=3, 84 | WaitTimeSeconds=5, 85 | VisibilityTimeout=VISIBILITY_TIMEOUT # initial visibility timeout 86 | ) 87 | messages = response.get('Messages', []) 88 | logger.info(f"Received {len(messages)} messages") 89 | for message in messages: 90 | await semaphore.acquire() 91 | asyncio.create_task( 92 | handle_chunking( 93 | semaphore, 94 | message, 95 | dynamodb, 96 | s3_client, 97 | sqs_client 98 | ) 99 | ) 100 | await asyncio.sleep(1) 101 | except Exception as e: 102 | logger.error(f"Error occurred: {e}") 103 | await asyncio.sleep(5) 104 | 105 | async def handle_chunking(semaphore, message, dynamodb, s3_client, sqs_client): 106 | try: 107 | 108 | message_body = json.loads(message['Body']) 109 | file_path = message_body.get('file_path') 110 | file_name = message_body.get('file_name') 111 | extraction_job_id = message_body.get('extraction_job_id') 112 | chunk_job_id = message_body.get('chunking_job_id') 113 | chunk_job_file_id = message_body.get('chunk_job_file_id') 114 | chunking_strategy = message_body.get('chunking_strategy') 115 | chunking_params = message_body.get('chunking_params') 116 | app_id = message_body.get('app_id') 117 | receipt_handle = message['ReceiptHandle'] 118 | 119 | # Infer File Type 120 | file_extension = file_name.split(".")[-1] 121 | 122 | await extend_visibility_timeout(sqs_client, receipt_handle) 123 | # Perform Chunking 124 | chunk_size = chunking_params.get('chunk_size', 1000) 125 | chunk_overlap = chunking_params.get('chunk_overlap', 0) 126 | content = read_file_from_s3(file_path) 127 | if file_extension == "json": 128 | json_chunker = JSONChunker() 129 | chunks = json_chunker.chunk_json(content) 130 | logger.info(f"Processed file: {file_name}, chunks: {json.dumps(chunks, indent=2)}") 131 | elif file_extension == "jsonl": 132 | json_chunker = JSONChunker() 133 | chunks = json_chunker.chunk_jsonl(content) 134 | logger.info(f"Processed file: {file_name}, chunks: {json.dumps(chunks, indent=2)}") 135 | else: 136 | if chunking_strategy == "fixed_size": 137 | fixed_size_chunker = FixedSizeChunker(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 138 | chunks = fixed_size_chunker.chunk(content) 139 | logger.info(f"Processed file: {file_name}, chunks: {json.dumps(chunks, indent=2)}") 140 | elif chunking_strategy == "recursive": 141 | recursive_chunker = RecursiveChunker(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 142 | chunks = recursive_chunker.chunk(content) 143 | logger.info(f"Processed file: {file_name}, chunks: {json.dumps(chunks, indent=2)}") 144 | elif chunking_strategy == "page": 145 | pagewise_chunker = PagewiseChunker() 146 | chunks = pagewise_chunker.chunk(content) 147 | logger.info(f"Processed file: {file_name}, chunks: {json.dumps(chunks, indent=2)}") 148 | else: 149 | raise ValueError(f"Invalid chunking strategy: {chunking_strategy}") 150 | 151 | # Save chunks to S3 152 | 153 | created_chunk_key = f"{app_id}/{extraction_job_id}/{file_name}/chunk_{chunk_job_id}.json" 154 | save_chunks_to_s3(RESULTS_S3_BUCKET, created_chunk_key, chunks) 155 | 156 | # delete the message from the queue 157 | sqs_client.delete_message( 158 | QueueUrl=QUEUE_URL, 159 | ReceiptHandle=message['ReceiptHandle'] 160 | ) 161 | logger.info(f"Deleted message from SQS: {message}") 162 | 163 | chunking_job_file = ChunkingJobFiles.safe_get(chunk_job_file_id) 164 | if chunking_job_file: 165 | chunking_job_file.status = "COMPLETED" 166 | chunking_job_file.save() 167 | logger.info(f"Updated chunking job file record: {chunk_job_file_id}") 168 | chunking_job = ChunkingJobs.safe_get(chunk_job_id) 169 | if chunking_job: 170 | chunking_job.status = "COMPLETED" 171 | chunking_job.queued_files -= 1 172 | chunking_job.completed_files += 1 173 | chunking_job.save() 174 | else: 175 | logger.error(f"Chunking job record not found: {chunk_job_id}") 176 | 177 | else: 178 | logger.error(f"Chunking job file record not found: {chunk_job_file_id}") 179 | 180 | logger.info(f"Updated chunking job record: {chunk_job_id}") 181 | except Exception as e: 182 | logger.error(f"Error processing message: {e}") 183 | finally: 184 | semaphore.release() 185 | 186 | async def extend_visibility_timeout(sqs_client, receipt_handle): 187 | try: 188 | sqs_client.change_message_visibility( 189 | QueueUrl=QUEUE_URL, 190 | ReceiptHandle=receipt_handle, 191 | VisibilityTimeout=VISIBILITY_TIMEOUT # extend the visibility timeout 192 | ) 193 | except Exception as e: 194 | logger.error(f"Failed to extend visibility timeout: {e}") 195 | 196 | async def ensure_task_running(background_tasks: BackgroundTasks, sqs_client, dynamodb, s3_client): 197 | global poll_task 198 | logger.info("Starting background task") 199 | semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS) 200 | while True: 201 | if poll_task is None or poll_task.done(): 202 | logger.info("Polling task not running or done, starting new task") 203 | poll_task = asyncio.create_task(poll_sqs(sqs_client, dynamodb, s3_client, semaphore)) 204 | background_tasks.add_task(lambda: poll_task) 205 | logger.info("Started new polling task") 206 | await asyncio.sleep(CHECK_INTERVAL) 207 | 208 | @app.get("/chunking/service/health") 209 | async def health_check(): 210 | return {"status": "UP"} 211 | 212 | @app.on_event("startup") 213 | async def startup_event(background_tasks: BackgroundTasks = BackgroundTasks()): 214 | 215 | global REGION_NAME 216 | 217 | if not ECS_METADATA_URL: 218 | raise HTTPException(status_code=500, detail="ECS_CONTAINER_METADATA_URI_V4 environment variable not set.") 219 | 220 | try: 221 | response = requests.get(ECS_METADATA_URL, timeout=10) 222 | response.raise_for_status() 223 | metadata = response.json() 224 | REGION_NAME = metadata.get("Labels", {}).get("com.amazonaws.ecs.task-arn", "").split(":")[3] 225 | 226 | s3_client, sqs_client, dynamodb_client = get_boto3_clients(REGION_NAME) 227 | # extraction = Extraction(region_name=REGION_NAME) 228 | 229 | logger.info("Chunking Processing Service started successfully.") 230 | logger.info(f"Region: {REGION_NAME}") 231 | logger.info(f"Results S3 Bucket: {RESULTS_S3_BUCKET}") 232 | logger.info(f"Chunking Job Results Table: {CHUNKING_JOBS_TABLE}") 233 | logger.info(f"Chunking Job Files Results Table: {CHUNKING_JOBS_FILES_TABLE}") 234 | logger.info(f"Chunking Queue URL: {QUEUE_URL}") 235 | except requests.exceptions.RequestException as e: 236 | raise HTTPException(status_code=500, detail=f"Error retrieving ECS metadata: {str(e)}") 237 | 238 | 239 | asyncio.create_task(ensure_task_running(background_tasks, sqs_client, dynamodb_client, s3_client)) 240 | -------------------------------------------------------------------------------- /services/foundations_chunking/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | from dyntastic import Dyntastic 5 | from pydantic import Field, model_validator 6 | import os 7 | from pydantic import BaseModel 8 | from enum import Enum 9 | 10 | 11 | class ChunkingJobs(Dyntastic): 12 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_TABLE") 13 | __hash_key__ = "chunking_job_id" 14 | 15 | chunking_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 16 | extraction_job_id: str 17 | app_id: str 18 | status: str 19 | chunking_strategy: str 20 | chunking_params: str 21 | total_file_count: int 22 | queued_files: int 23 | completed_files: int 24 | failed_files: int 25 | timestamp: datetime = Field(default_factory=datetime.now) 26 | updated_at: datetime = Field(default_factory=datetime.now) 27 | 28 | @model_validator(mode="before") 29 | def set_updated_at(cls, values): 30 | values["updated_at"] = datetime.now() 31 | return values 32 | 33 | class ChunkingJobFiles(Dyntastic): 34 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_FILES_TABLE") 35 | __hash_key__ = "chunk_job_file_id" 36 | 37 | chunk_job_file_id: str 38 | chunking_job_id: str 39 | app_id: str 40 | file_name: str 41 | file_path: str 42 | file_id: str 43 | status: str 44 | timestamp: datetime = Field(default_factory=datetime.now) -------------------------------------------------------------------------------- /services/foundations_chunking/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.3.0 3 | boto3==1.34.102 4 | botocore==1.34.102 5 | certifi==2024.07.04 6 | cffi==1.16.0 7 | charset-normalizer==3.3.2 8 | click==8.1.7 9 | cryptography==42.0.7 10 | dnspython==2.6.1 11 | dyntastic==0.15.0 12 | email_validator==2.1.1 13 | exceptiongroup==1.2.1 14 | fastapi==0.111.0 15 | fastapi-cli==0.0.3 16 | h11==0.14.0 17 | httpcore==1.0.5 18 | httptools==0.6.1 19 | httpx==0.27.0 20 | idna==3.7 21 | Jinja2==3.1.4 22 | jmespath==1.0.1 23 | jsonpatch==1.33 24 | jsonpointer==2.4 25 | langchain-core==0.2.3 26 | langchain-text-splitters==0.2.0 27 | langsmith==0.1.67 28 | markdown-it-py==3.0.0 29 | MarkupSafe==2.1.5 30 | mdurl==0.1.2 31 | orjson==3.10.3 32 | packaging==23.2 33 | pycparser==2.22 34 | pydantic==2.7.2 35 | pydantic_core==2.18.3 36 | Pygments==2.18.0 37 | PyJWT==2.8.0 38 | python-dateutil==2.9.0.post0 39 | python-dotenv==1.0.1 40 | python-multipart==0.0.9 41 | PyYAML==6.0.1 42 | requests==2.32.3 43 | rich==13.7.1 44 | s3transfer==0.10.1 45 | shellingham==1.5.4 46 | six==1.16.0 47 | sniffio==1.3.1 48 | starlette==0.37.2 49 | tenacity==8.3.0 50 | typer==0.12.3 51 | typing_extensions==4.12.1 52 | ujson==5.9.0 53 | urllib3==1.26.19 54 | uvicorn==0.29.0 55 | uvloop==0.19.0 56 | watchfiles==0.21.0 57 | websockets==12.0 58 | -------------------------------------------------------------------------------- /services/foundations_chunking/utils/fixed_size_chunking.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | from langchain_text_splitters.character import CharacterTextSplitter 3 | 4 | class FixedSizeChunker: 5 | def __init__(self, chunk_size: int = 4000, chunk_overlap: int = 200): 6 | self.chunk_size = chunk_size 7 | self.chunk_overlap = chunk_overlap 8 | # self.seperator = seperator 9 | self.text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 10 | 11 | def chunk(self, content: str) -> List[Dict[str,str]]: 12 | pages = content.get('pages',[]) 13 | chunks = [] 14 | for page in pages: 15 | page_text = page.get('page_text', '') 16 | page_chunks = self.text_splitter.split_text(page_text) 17 | for chunk in page_chunks: 18 | chunks.append({"chunk":chunk}) 19 | 20 | return chunks 21 | # def __init__(self, chunk_size: int = 4000, chunk_overlap: int = 200, seperator: str = '\n\n'): 22 | # self.chunk_size = chunk_size 23 | # self.chunk_overlap = chunk_overlap 24 | # self.seperator = seperator 25 | # self.text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, seperator=seperator) 26 | 27 | -------------------------------------------------------------------------------- /services/foundations_chunking/utils/json_chunking.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Dict 3 | from langchain_text_splitters import RecursiveJsonSplitter 4 | 5 | class JSONChunker: 6 | def __init__(self): 7 | self.splitter = RecursiveJsonSplitter(max_chunk_size=300) 8 | 9 | def chunk_json(self, content: str) -> List[Dict[str,str]]: 10 | try: 11 | pages = content.get('pages',[]) 12 | json_content = json.loads(pages[0].get('page_text','')) 13 | chunks = [] 14 | json_chunks = self.splitter.split_text(json_data=json_content) 15 | for chunk in json_chunks: 16 | chunks.append({"chunk":chunk}) 17 | return chunks 18 | except Exception as e: 19 | print(e) 20 | 21 | def chunk_jsonl(self, content: str) -> List[Dict[str,str]]: 22 | pages = content.get('pages',[]) 23 | json_content = str(pages[0].get('page_text','')) 24 | chunks = [] 25 | for line in json_content.split("\n"): 26 | chunks.append({"chunk":line}) 27 | return chunks -------------------------------------------------------------------------------- /services/foundations_chunking/utils/page_wise_chunking.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | from langchain_text_splitters.character import CharacterTextSplitter 3 | 4 | class PagewiseChunker: 5 | 6 | def chunk(self, content: str) -> List[Dict[str,str]]: 7 | pages = content.get('pages',[]) 8 | chunks = [] 9 | for page in pages: 10 | page_text = page.get('page_text', '') 11 | chunks.append({"chunk":page_text}) 12 | return chunks 13 | 14 | -------------------------------------------------------------------------------- /services/foundations_chunking/utils/recursive_chunking.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | from langchain_text_splitters import RecursiveCharacterTextSplitter 3 | 4 | class RecursiveChunker: 5 | def __init__(self, chunk_size: int = 4000, chunk_overlap: int = 200): 6 | self.chunk_size = chunk_size 7 | self.chunk_overlap = chunk_overlap 8 | # self.seperator = seperator 9 | self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 10 | 11 | def chunk(self, content: str) -> List[Dict[str,str]]: 12 | pages = content.get('pages',[]) 13 | chunks = [] 14 | for page in pages: 15 | page_text = page.get('page_text', '') 16 | page_chunks = self.text_splitter.split_text(page_text) 17 | for chunk in page_chunks: 18 | chunks.append({"chunk":chunk}) 19 | 20 | return chunks 21 | 22 | -------------------------------------------------------------------------------- /services/foundations_document_processing/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_document_processing/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field, model_validator 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | # Extraction job status enum 12 | class ExtractionJobStatus(str, Enum): 13 | CREATED = "CREATED" 14 | STARTED = "STARTED" 15 | COMPLETED = "COMPLETED" 16 | COMPLETED_WITH_ERRORS = "COMPLETED_WITH_ERRORS" 17 | FAILED = "FAILED" 18 | 19 | 20 | 21 | class ExtractionJobs(Dyntastic): 22 | __table_name__ = lambda: os.environ.get("EXTRACTION_JOBS_TABLE") 23 | __hash_key__ = "job_id" 24 | 25 | job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 26 | app_id: str 27 | completed_file_count: int = 0 28 | total_file_count: int 29 | failed_file_count: int = 0 30 | status: str = "CREATED" 31 | queued_files: int = 0 32 | timestamp: datetime = Field(default_factory=datetime.now) 33 | updated_at: datetime = Field(default_factory=datetime.now) 34 | 35 | @model_validator(mode="before") 36 | def set_updated_at(cls, values): 37 | values["updated_at"] = datetime.now() 38 | return values 39 | 40 | class ExtractionJobFiles(Dyntastic): 41 | 42 | __table_name__ = lambda: os.environ.get("EXTRACTION_JOB_FILES_TABLE") 43 | __hash_key__ = "job_id" 44 | __range_key__ = "file_name" 45 | 46 | job_id: str 47 | file_name: str 48 | file_path: str 49 | file_id: str 50 | status: str = "PENDING" 51 | timestamp: datetime = Field(default_factory=datetime.now) 52 | 53 | 54 | class ChunkingJobs(Dyntastic): 55 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_TABLE") 56 | __hash_key__ = "chunking_job_id" 57 | 58 | chunking_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 59 | extraction_job_id: str 60 | app_id: str 61 | status: str 62 | chunking_strategy: str 63 | chunking_params: str 64 | total_file_count: int 65 | queued_files: int 66 | completed_files: int 67 | failed_files: int 68 | timestamp: datetime = Field(default_factory=datetime.now) 69 | updated_at: datetime = Field(default_factory=datetime.now) 70 | 71 | @model_validator(mode="before") 72 | def set_updated_at(cls, values): 73 | values["updated_at"] = datetime.now() 74 | return values 75 | 76 | class ChunkingJobFiles(Dyntastic): 77 | __table_name__ = lambda: os.environ.get("CHUNKING_JOBS_FILES_TABLE") 78 | __hash_key__ = "chunk_job_file_id" 79 | 80 | chunk_job_file_id: str 81 | chunking_job_id: str 82 | app_id: str 83 | file_name: str 84 | file_path: str 85 | file_id: str 86 | status: str 87 | timestamp: datetime = Field(default_factory=datetime.now) 88 | 89 | 90 | ## Input / Output Models 91 | 92 | class Doc(BaseModel): 93 | file_name: str 94 | 95 | class CreateExtractionResponse(BaseModel): 96 | extraction_job_id: str 97 | status: ExtractionJobStatus 98 | 99 | class RegisterFileRequest(BaseModel): 100 | extraction_job_id: str 101 | file_name: str 102 | 103 | class RegisterFileResponse(BaseModel): 104 | extraction_job_id: str 105 | file_name: str 106 | file_id: str 107 | upload_url: str 108 | 109 | class StartExtractionJobRequest(BaseModel): 110 | extraction_job_id: str 111 | 112 | class GetExtractionJobFilesRequest(BaseModel): 113 | extraction_job_id: str 114 | 115 | class ExtractionJobFileResponse(BaseModel): 116 | status: str 117 | result_url: Optional[str] = None 118 | extraction_job_id: str 119 | 120 | class ExtractionJobFileRequest(BaseModel): 121 | extraction_job_id: str 122 | file_name: str 123 | 124 | # {"job_id": job_id, "total_files": file_count, "status": "STARTED"} 125 | class StartExtractionJobResponse(BaseModel): 126 | extraction_job_id: str 127 | total_files: int 128 | status: ExtractionJobStatus = Field(default=ExtractionJobStatus.STARTED) 129 | 130 | class ChunkingParams(BaseModel): 131 | chunk_size: Optional[int] = None 132 | chunk_overlap: Optional[int] = None 133 | 134 | class ChunkingStrategy(str,Enum): 135 | FIXED_SIZE = "fixed_size" 136 | RECURSIVE = "recursive" 137 | PAGE = "page" 138 | 139 | class CreateChunkingJobRequest(BaseModel): 140 | extraction_job_id: str 141 | chunking_strategy: ChunkingStrategy 142 | chunking_params: Optional[ChunkingParams] = None 143 | 144 | class CreateChunkingJobResponse(BaseModel): 145 | chunking_job_id: str 146 | extraction_job_id: str 147 | status: str 148 | total_file_count: int 149 | 150 | class GetFileChunksRequest(BaseModel): 151 | chunking_job_id: str 152 | file_name: str 153 | 154 | class GetExtractionJobFilesResponse(BaseModel): 155 | job_id: str 156 | file_name: str 157 | status: str 158 | 159 | class ExtractionJobStatusResponse(BaseModel): 160 | job_id: str 161 | completed_file_count: int 162 | total_file_count: int 163 | failed_file_count: int 164 | status: str 165 | 166 | 167 | avoid_chars = ["&", "$", "@", "=", ";", "/", ":", "+", " ", ",", "?", "\\", "{", "}", "^", "]", "\"", ">", "[", "~", "<", "#", "|", "%"] -------------------------------------------------------------------------------- /services/foundations_document_processing/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.3.0 3 | boto3==1.34.102 4 | botocore==1.34.102 5 | certifi==2024.07.04 6 | cffi==1.16.0 7 | charset-normalizer==3.3.2 8 | click==8.1.7 9 | cryptography==42.0.7 10 | dnspython==2.6.1 11 | dyntastic==0.15.0 12 | email_validator==2.1.1 13 | exceptiongroup==1.2.1 14 | fastapi==0.111.0 15 | fastapi-cli==0.0.3 16 | h11==0.14.0 17 | httpcore==1.0.5 18 | httptools==0.6.1 19 | httpx==0.27.0 20 | idna==3.7 21 | Jinja2==3.1.4 22 | jmespath==1.0.1 23 | markdown-it-py==3.0.0 24 | MarkupSafe==2.1.5 25 | mdurl==0.1.2 26 | orjson==3.10.3 27 | pycparser==2.22 28 | pydantic==2.7.1 29 | pydantic_core==2.18.2 30 | Pygments==2.18.0 31 | PyJWT==2.8.0 32 | python-dateutil==2.9.0.post0 33 | python-dotenv==1.0.1 34 | python-multipart==0.0.9 35 | PyYAML==6.0.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | s3transfer==0.10.1 39 | shellingham==1.5.4 40 | six==1.16.0 41 | sniffio==1.3.1 42 | starlette==0.37.2 43 | typer==0.12.3 44 | typing_extensions==4.11.0 45 | ujson==5.9.0 46 | urllib3==1.26.19 47 | uvicorn==0.29.0 48 | uvloop==0.19.0 49 | watchfiles==0.21.0 50 | websockets==12.0 51 | -------------------------------------------------------------------------------- /services/foundations_extraction/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_extraction/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import asyncio 5 | from fastapi import FastAPI, BackgroundTasks, HTTPException 6 | from pydantic import BaseModel 7 | import boto3 8 | from botocore.config import Config 9 | from utils.extractor import Extraction, ExtractedDocument 10 | import requests 11 | from models import * 12 | from dyntastic import A, transaction 13 | from concurrent.futures import ThreadPoolExecutor 14 | import threading 15 | 16 | 17 | # Configure structured logging 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger("document_processor") 20 | logger.setLevel(logging.INFO) 21 | 22 | QUEUE_URL = os.getenv('QUEUE_URL') 23 | JOB_RESULTS_TABLE = os.getenv('JOB_RESULTS_TABLE') 24 | JOB_FILES_TABLE = os.getenv('JOB_FILES_TABLE') 25 | RESULTS_S3_BUCKET = os.getenv('RESULTS_S3_BUCKET') 26 | SOURCE_S3_BUCKET = os.getenv('SOURCE_S3_BUCKET') 27 | MAX_RETRIES = int(os.getenv('MAX_RETRIES', '10')) 28 | REGION_NAME = '' 29 | MAX_CONCURRENT_TASKS = int(os.getenv('MAX_CONCURRENT_TASKS', '10')) 30 | VISIBILITY_TIMEOUT = int(os.getenv('VISIBILITY_TIMEOUT', '600')) # in seconds (10 minutes) 31 | ECS_METADATA_URL = os.getenv("ECS_CONTAINER_METADATA_URI_V4", "") 32 | 33 | # Global variables 34 | retry_config = Config(retries={"max_attempts": MAX_RETRIES, "mode": "standard"}) 35 | CHECK_INTERVAL = 60 36 | poll_task = None 37 | 38 | app = FastAPI() 39 | 40 | def get_boto3_clients(region_name): 41 | session = boto3.Session(region_name=region_name) 42 | s3_client = session.client('s3', config=retry_config) 43 | sqs_client = session.client('sqs', config=retry_config) 44 | dynamodb_client = session.client('dynamodb', config=retry_config) 45 | return s3_client, sqs_client, dynamodb_client 46 | 47 | 48 | 49 | def update_job_entry(job_id: str, textract_job_id: str, status: str, dynamodb, app_id=None, extraction_obj=None): 50 | try: 51 | # Get job files with job_id-index 52 | job_files = ExtractionJobFiles.query(A.job_id == job_id, index='job_id-index') 53 | 54 | job_files_list = [file for file in job_files] 55 | 56 | total_file_count = len(job_files_list) 57 | 58 | # Get COMPLETED and FAILED job files 59 | completed_files = [file.file_name for file in job_files_list if file.status == 'COMPLETED'] 60 | failed_files = [file.file_name for file in job_files_list if file.status == 'FAILED'] 61 | extraction_job = ExtractionJobs.get(job_id) 62 | 63 | # Update job status based on COMPLETED and FAILED files 64 | if len(completed_files) + len(failed_files) == total_file_count: 65 | 66 | if len(failed_files) > 0 and len(completed_files) > 0: 67 | status = 'COMPLETED_WITH_ERRORS' 68 | elif len(failed_files) > 0: 69 | status = 'FAILED' 70 | else: 71 | status = 'COMPLETED' 72 | 73 | if extraction_job: 74 | extraction_job.status = status 75 | 76 | # Update job status 77 | 78 | if extraction_job: 79 | extraction_job.completed_file_count = len(completed_files) 80 | extraction_job.failed_file_count = len(failed_files) 81 | extraction_job.save() 82 | 83 | 84 | 85 | logger.info(f"Results saved for job {job_id}") 86 | except Exception as e: 87 | logger.error(f"Error occurred while saving job entry: {e}") 88 | 89 | def update_job_file_entry(job_id: str, file_name: str, status: str, dynamodb): 90 | try: 91 | extraction_job_file = ExtractionJobFiles.get(job_id, file_name) 92 | if extraction_job_file: 93 | extraction_job_file.status = status 94 | extraction_job_file.save() 95 | else: 96 | raise Exception(f"Job file {file_name} not found in the database") 97 | logger.info(f"Results saved for job file {file_name}") 98 | except Exception as e: 99 | logger.error(f"Error occurred while saving file entry: {e}") 100 | 101 | async def poll_sqs(extraction: Extraction, sqs_client, dynamodb, s3_client, semaphore): 102 | logger.info("Polling SQS queue") 103 | while True: 104 | executor = ThreadPoolExecutor(max_workers=5) 105 | try: 106 | response = sqs_client.receive_message( 107 | QueueUrl=QUEUE_URL, 108 | MaxNumberOfMessages=5, 109 | WaitTimeSeconds=0, 110 | VisibilityTimeout=VISIBILITY_TIMEOUT 111 | ) 112 | messages = response.get('Messages', []) 113 | logger.info(f"Received {len(messages)} messages") 114 | tasks = [] 115 | for message in messages: 116 | await semaphore.acquire() 117 | message_body = json.loads(message['Body']) 118 | logger.info(f"Acquired semaphore for message {message_body}") 119 | loop = asyncio.get_event_loop() 120 | task = loop.run_in_executor(executor, handle_extraction, semaphore, message, extraction, dynamodb, s3_client, sqs_client) 121 | logger.info(f"Task created for message {message_body}") 122 | tasks.append(task) 123 | 124 | await asyncio.gather(*tasks) 125 | await asyncio.sleep(5) 126 | except Exception as e: 127 | logger.error(f"Error occurred: {e}") 128 | await asyncio.sleep(5) 129 | 130 | def handle_extraction(semaphore, message, extraction, dynamodb, s3_client, sqs_client): 131 | try: 132 | message_body = json.loads(message['Body']) 133 | file_path = message_body.get('file_path') 134 | job_id = message_body.get('job_id') 135 | app_id = message_body.get('app_id') 136 | s3_path = f's3://{SOURCE_S3_BUCKET}/{file_path}' 137 | file_name = file_path.split('/')[-1] 138 | receipt_handle = message['ReceiptHandle'] 139 | logger.info(f"Handle extraction for file {file_path}") 140 | 141 | ## Get file type 142 | file_type = file_path.split('.')[-1].lower() 143 | textract_file_types = ['pdf', 'png', 'jpg', 'jpeg', 'tiff'] 144 | other_file_types = ['txt', 'md', 'html', 'json', 'jsonl'] 145 | if file_type in textract_file_types: 146 | textract_job_id = extraction.extract(s3_path) 147 | try: 148 | logger.info(f"Performing extraction for job {textract_job_id}") 149 | extend_visibility_timeout(sqs_client, receipt_handle) 150 | logger.info(f"Extended visibility timeout for message {message_body}") 151 | extracted_document = extraction.get_document(textract_job_id, file_name) 152 | logger.info(f"Extracted document for job {textract_job_id}") 153 | extracted_document.s3_save(app_id, job_id, file_path, RESULTS_S3_BUCKET, s3_client) 154 | logger.info(f"Saved results for job {job_id}") 155 | file_name = file_path.split('/')[-1] 156 | 157 | update_job_file_entry(job_id, file_name, 'COMPLETED', dynamodb) 158 | update_job_entry(job_id, textract_job_id, 'COMPLETED', dynamodb, app_id, extraction) 159 | 160 | # update_job_entry(job_id, textract_job_id, 'COMPLETED', dynamodb) 161 | 162 | logger.info(f"Extraction completed for job {job_id}") 163 | sqs_client.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=receipt_handle) 164 | except Exception as e: 165 | file_name = file_path.split('/')[-1] 166 | 167 | update_job_file_entry(job_id, file_name, 'FAILED', dynamodb) 168 | update_job_entry(job_id, textract_job_id, 'FAILED', dynamodb, app_id, extraction) 169 | # update_jobs_map(job_id, app_id, 'FAILED', dynamodb, file_name, None, extraction) 170 | logger.error(f"Error occurred during extraction for job {job_id}: {e}") 171 | elif file_type in other_file_types: 172 | logger.info(f"Performing extraction") 173 | extend_visibility_timeout(sqs_client, receipt_handle) 174 | extracted_document = extraction.extract_nonpdf(SOURCE_S3_BUCKET, file_path) 175 | extracted_document.s3_save(app_id, job_id, file_path, RESULTS_S3_BUCKET, s3_client) 176 | file_name = file_path.split('/')[-1] 177 | 178 | update_job_file_entry(job_id, file_name, 'COMPLETED', dynamodb) 179 | # update_jobs_map(job_id,app_id, 'COMPLETED', dynamodb, file_name, extracted_document, extraction) 180 | update_job_entry(job_id, file_name, 'COMPLETED', dynamodb, app_id, extraction) 181 | 182 | sqs_client.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=receipt_handle) 183 | else: 184 | logger.error(f"Unsupported file type: {file_type}") 185 | sqs_client.delete_message(QueueUrl=QUEUE_URL, ReceiptHandle=receipt_handle) 186 | update_job_file_entry(job_id, file_name, 'FAILED', dynamodb) 187 | update_job_entry(job_id, file_name, 'FAILED', dynamodb, app_id, extraction) 188 | 189 | # update_jobs_map(job_id, app_id, 'FAILED', dynamodb, file_name, None, extraction) 190 | finally: 191 | semaphore.release() 192 | 193 | def extend_visibility_timeout(sqs_client, receipt_handle): 194 | try: 195 | sqs_client.change_message_visibility( 196 | QueueUrl=QUEUE_URL, 197 | ReceiptHandle=receipt_handle, 198 | VisibilityTimeout=VISIBILITY_TIMEOUT 199 | ) 200 | except Exception as e: 201 | logger.error(f"FAILED to extend visibility timeout: {e}") 202 | 203 | async def ensure_task_running(extraction: Extraction, sqs_client, dynamodb, s3_client): 204 | global poll_task 205 | logger.info("Starting background task") 206 | semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS) 207 | while True: 208 | if poll_task is None or poll_task.done(): 209 | logger.info("Polling task not running or done, starting new task") 210 | poll_task = asyncio.create_task(poll_sqs(extraction, sqs_client, dynamodb, s3_client, semaphore)) 211 | await asyncio.sleep(CHECK_INTERVAL) 212 | 213 | @app.on_event("startup") 214 | async def startup_event(): 215 | global REGION_NAME 216 | 217 | if not ECS_METADATA_URL: 218 | raise HTTPException(status_code=500, detail="ECS_CONTAINER_METADATA_URI_V4 environment variable not set.") 219 | 220 | try: 221 | response = requests.get(ECS_METADATA_URL, timeout=10) 222 | response.raise_for_status() 223 | metadata = response.json() 224 | REGION_NAME = metadata.get("Labels", {}).get("com.amazonaws.ecs.task-arn", "").split(":")[3] 225 | 226 | s3_client, sqs_client, dynamodb_client = get_boto3_clients(REGION_NAME) 227 | extraction = Extraction(region_name=REGION_NAME) 228 | 229 | logger.info("Document Processing Service started successfully.") 230 | logger.info(f"Region: {REGION_NAME}") 231 | logger.info(f"Results S3 Bucket: {RESULTS_S3_BUCKET}") 232 | logger.info(f"Job Results Table: {JOB_RESULTS_TABLE}") 233 | logger.info(f"Queue URL: {QUEUE_URL}") 234 | 235 | asyncio.create_task(ensure_task_running(extraction, sqs_client, dynamodb_client, s3_client)) 236 | 237 | except requests.exceptions.RequestException as e: 238 | raise HTTPException(status_code=500, detail=f"Error retrieving ECS metadata: {str(e)}") 239 | -------------------------------------------------------------------------------- /services/foundations_extraction/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field, model_validator 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | 12 | class ExtractionJobs(Dyntastic): 13 | __table_name__ = lambda: os.environ.get("JOB_RESULTS_TABLE") 14 | __hash_key__ = "job_id" 15 | 16 | job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 17 | app_id: str 18 | completed_file_count: int = 0 19 | total_file_count: int 20 | failed_file_count: int = 0 21 | status: str = "CREATED" 22 | queued_files: int = 0 23 | timestamp: datetime = Field(default_factory=datetime.now) 24 | updated_at: datetime = Field(default_factory=datetime.now) 25 | 26 | @model_validator(mode="before") 27 | def set_updated_at(cls, values): 28 | values["updated_at"] = datetime.now() 29 | return values 30 | 31 | class ExtractionJobFiles(Dyntastic): 32 | 33 | __table_name__ = lambda: os.environ.get("JOB_FILES_TABLE") 34 | __hash_key__ = "job_id" 35 | __range_key__ = "file_name" 36 | 37 | job_id: str 38 | file_name: str 39 | file_path: str 40 | file_id: str 41 | status: str = "PENDING" 42 | timestamp: datetime = Field(default_factory=datetime.now) 43 | 44 | 45 | ## Input / Output Models 46 | 47 | class Doc(BaseModel): 48 | file_name: str 49 | 50 | class CreateExtractionResponse(BaseModel): 51 | extraction_job_id: str 52 | status: str 53 | 54 | class RegisterFileRequest(BaseModel): 55 | extraction_job_id: str 56 | file_name: str 57 | 58 | class RegisterFileResponse(BaseModel): 59 | extraction_job_id: str 60 | file_name: str 61 | file_id: str 62 | upload_url: str 63 | 64 | class StartExtractionJobRequest(BaseModel): 65 | extraction_job_id: str 66 | 67 | class GetExtractionJobFilesRequest(BaseModel): 68 | extraction_job_id: str 69 | 70 | class ExtractionJobFileResponse(BaseModel): 71 | status: str 72 | result_url: Optional[str] = None 73 | extraction_job_id: str 74 | 75 | class ExtractionJobFileRequest(BaseModel): 76 | extraction_job_id: str 77 | file_name: str 78 | 79 | class StartExtractionJobResponse(BaseModel): 80 | extraction_job_id: str 81 | total_files: int 82 | status: str 83 | 84 | class ChunkingParams(BaseModel): 85 | chunk_size: Optional[int] = None 86 | chunk_overlap: Optional[int] = None 87 | 88 | 89 | class ChunkingStrategy(str,Enum): 90 | FIXED_SIZE = "fixed_size" 91 | RECURSIVE = "recursive" 92 | 93 | class CreateChunkingJobRequest(BaseModel): 94 | extraction_job_id: str 95 | chunking_strategy: ChunkingStrategy 96 | chunking_params: Optional[ChunkingParams] = None 97 | 98 | 99 | avoid_chars = ["&", "$", "@", "=", ";", "/", ":", "+", " ", ",", "?", "\\", "{", "}", "^", "]", "\"", ">", "[", "~", "<", "#", "|", "%"] -------------------------------------------------------------------------------- /services/foundations_extraction/requirements.txt: -------------------------------------------------------------------------------- 1 | amazon-textract-caller==0.2.3 2 | amazon-textract-response-parser==1.0.2 3 | amazon-textract-textractor==1.7.11 4 | annotated-types==0.6.0 5 | anyio==4.3.0 6 | boto3==1.34.108 7 | botocore==1.34.108 8 | certifi==2024.07.04 9 | click==8.1.7 10 | dnspython==2.6.1 11 | dyntastic==0.15.0 12 | editdistance==0.8.1 13 | email_validator==2.1.1 14 | exceptiongroup==1.2.1 15 | fastapi==0.111.0 16 | fastapi-cli==0.0.3 17 | h11==0.14.0 18 | httpcore==1.0.5 19 | httptools==0.6.1 20 | httpx==0.27.0 21 | idna==3.7 22 | Jinja2==3.1.4 23 | jmespath==1.0.1 24 | markdown-it-py==3.0.0 25 | MarkupSafe==2.1.5 26 | marshmallow==3.21.2 27 | mdurl==0.1.2 28 | orjson==3.10.3 29 | packaging==24.0 30 | pillow==10.3.0 31 | pydantic==2.7.1 32 | pydantic_core==2.18.2 33 | Pygments==2.18.0 34 | python-dateutil==2.9.0.post0 35 | python-dotenv==1.0.1 36 | python-multipart==0.0.9 37 | PyYAML==6.0.1 38 | requests==2.32.3 39 | rich==13.7.1 40 | s3transfer==0.10.1 41 | shellingham==1.5.4 42 | six==1.16.0 43 | sniffio==1.3.1 44 | starlette==0.37.2 45 | tabulate==0.9.0 46 | typer==0.12.3 47 | typing_extensions==4.11.0 48 | ujson==5.10.0 49 | urllib3==1.26.19 50 | uvicorn==0.29.0 51 | uvloop==0.19.0 52 | watchfiles==0.21.0 53 | websockets==12.0 54 | XlsxWriter==3.2.0 55 | -------------------------------------------------------------------------------- /services/foundations_extraction/utils/extractor.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import re 3 | import uuid 4 | import json 5 | from textractor import Textractor 6 | from textractor.data.constants import TextractFeatures, TextractAPI 7 | from textractor.entities.lazy_document import LazyDocument 8 | from textractor.data.text_linearization_config import TextLinearizationConfig 9 | 10 | class ExtractedDocument: 11 | def __init__(self, pages=None, tables=None, all_text=None, input_path=None): 12 | self.pages = pages or [] 13 | self.tables = tables or {} 14 | self.all_text = all_text 15 | self.input_path = input_path 16 | 17 | def s3_save(self, app_id, job_id, file_name, bucket, s3_client): 18 | file_name = file_name.split('/')[-1] 19 | 20 | extracted_text_content = { 21 | "job_id": job_id, 22 | "file_name": file_name, 23 | "pages": [{"page_number": i + 1, "page_text": page} for i, page in enumerate(self.pages)] 24 | } 25 | 26 | extracted_tables_content = { 27 | "job_id": job_id, 28 | "file_name": file_name, 29 | "pages": [{"page_number": i + 1, "tables": self.tables.get(i + 1, [])} for i in range(len(self.pages))] 30 | } 31 | 32 | extracted_text_key = f"{app_id}/{job_id}/{file_name}/extracted_text.json" 33 | s3_client.put_object( 34 | Bucket=bucket, 35 | Key=extracted_text_key, 36 | Body=json.dumps(extracted_text_content), 37 | ContentType="application/json", 38 | ) 39 | 40 | extracted_tables_key = f"{app_id}/{job_id}/{file_name}/extracted_tables.json" 41 | s3_client.put_object( 42 | Bucket=bucket, 43 | Key=extracted_tables_key, 44 | Body=json.dumps(extracted_tables_content), 45 | ContentType="application/json", 46 | ) 47 | 48 | metadata_key = f"{app_id}/{job_id}/{file_name}/metadata.json" 49 | try: 50 | metadata_obj = s3_client.get_object(Bucket=bucket, Key=metadata_key) 51 | metadata = json.loads(metadata_obj['Body'].read().decode('utf-8')) 52 | except s3_client.exceptions.NoSuchKey: 53 | metadata = {"job_id": job_id, "files": []} 54 | 55 | metadata["files"].append({ 56 | "file_name": file_name, 57 | "extracted_text_key": extracted_text_key, 58 | "extracted_tables_key": extracted_tables_key 59 | }) 60 | 61 | s3_client.put_object( 62 | Bucket=bucket, 63 | Key=metadata_key, 64 | Body=json.dumps(metadata), 65 | ContentType="application/json", 66 | ) 67 | 68 | class Extraction: 69 | def __init__(self, region_name): 70 | self.region_name = region_name 71 | 72 | def extract(self, document_path): 73 | extractor = Textractor(region_name=self.region_name) 74 | client_request_token = str(uuid.uuid4()) 75 | document = extractor.start_document_analysis( 76 | file_source=document_path, 77 | features=[TextractFeatures.LAYOUT, TextractFeatures.TABLES], 78 | client_request_token=client_request_token, 79 | save_image=False, 80 | ) 81 | return document.job_id 82 | 83 | def extract_nonpdf(self, s3_bucket, s3_key): 84 | # read s3 object 85 | s3_client = boto3.client("s3", region_name=self.region_name) 86 | s3_obj = s3_client.get_object(Bucket=s3_bucket, Key=s3_key) 87 | file_content = s3_obj["Body"].read() 88 | file_name = s3_key.split("/")[-1] 89 | if isinstance(file_content, bytes): 90 | file_content = file_content.decode("utf-8") 91 | pages = [file_content] 92 | return ExtractedDocument(pages=pages, tables={}, all_text=file_content, input_path=file_name) 93 | 94 | def extract_tables_from_page(self, page_text): 95 | tables = re.findall(r".*?
", page_text, re.DOTALL) 96 | page_no_tables_text = re.sub(r".*?
", "", page_text, flags=re.DOTALL) 97 | tables_text = [table.strip() for table in tables] 98 | return page_no_tables_text, tables_text 99 | 100 | def get_document(self, job_id, file_name): 101 | textract_client = boto3.client("textract", region_name=self.region_name) 102 | lazy_doc = LazyDocument( 103 | job_id=job_id, textract_client=textract_client, api=TextractAPI.ANALYZE 104 | ) 105 | 106 | config = TextLinearizationConfig( 107 | hide_figure_layout=True, 108 | title_prefix="", 109 | title_suffix="", 110 | text_prefix="", 111 | text_suffix="", 112 | section_header_prefix="
", 113 | section_header_suffix="
", 114 | table_prefix="", 115 | table_suffix="
", 116 | # table_linearization_format="HTML", 117 | list_element_prefix="", 118 | list_element_suffix="", 119 | key_value_layout_prefix="", 120 | key_value_layout_suffix="", 121 | key_prefix="", 122 | key_suffix="", 123 | value_prefix="", 124 | value_suffix="", 125 | hide_footer_layout=True, 126 | hide_page_num_layout=True 127 | # table_row_prefix = "", 128 | # table_row_suffix = "", 129 | # table_cell_prefix = "", 130 | # table_cell_suffix = "" 131 | ) 132 | 133 | e_pages = [] 134 | all_text = "" 135 | all_tables = {} 136 | for page_number, page in enumerate(lazy_doc.pages, start=1): 137 | page_text = page.get_text(config=config) 138 | page_no_tables_text, tables = self.extract_tables_from_page(page_text) 139 | all_text += "" + page_text + "" 140 | all_tables[page_number] = tables 141 | e_pages.append(page_text) 142 | 143 | return ExtractedDocument(pages=e_pages, tables=all_tables, all_text=all_text, input_path=file_name) 144 | -------------------------------------------------------------------------------- /services/foundations_model_invocation/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_model_invocation/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any, Union, Tuple 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field, validator 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | 12 | 13 | class ModelInvocationLogs(Dyntastic): 14 | __table_name__ = lambda: os.environ.get("LOGGING_TABLE") 15 | __hash_key__ = "invocation_id" 16 | 17 | invocation_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 18 | timestamp: datetime = Field(default_factory=datetime.now) 19 | model_name: str 20 | model_id: str 21 | input_tokens: Optional[int] = None 22 | output_tokens: Optional[int] = None 23 | app_id: str 24 | status: str 25 | error_message: Optional[str] = None 26 | 27 | 28 | class InvokeModelRequest(BaseModel): 29 | model_name: str 30 | prompt: Union[str, List[Dict[str, Union[str, List[Dict[str, str]]]]]] 31 | max_tokens: Optional[int] = None 32 | temperature: Optional[float] = None 33 | top_p: Optional[float] = None 34 | top_k: Optional[int] = None 35 | stop_sequences: Optional[List[str]] = None 36 | system_prompts: Optional[List[Dict[str, Union[str, List[Dict[str, str]]]]]] = Field(None, 37 | example=[{"text":"You are a helpful assistant."}]) 38 | 39 | @validator('prompt', pre=True, always=True) 40 | def check_prompt(cls, v): 41 | if isinstance(v, str): 42 | return v 43 | elif isinstance(v, list): 44 | for message in v: 45 | if not isinstance(message, dict) or 'role' not in message or 'content' not in message: 46 | raise ValueError("Each message must be a dict with 'role' and 'content'") 47 | return v 48 | else: 49 | raise ValueError("prompt must be either a string or a list of messages") 50 | 51 | class InvokeModelWithRawInputRequest(BaseModel): 52 | model_id: str 53 | raw_input: Dict 54 | 55 | class InvokeEmbedModelRequest(BaseModel): 56 | model_name: str 57 | input_text: Optional[str] = None -------------------------------------------------------------------------------- /services/foundations_model_invocation/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.3.0 3 | boto3==1.34.122 4 | botocore==1.34.122 5 | certifi==2024.07.04 6 | cffi==1.16.0 7 | charset-normalizer==3.3.2 8 | click==8.1.7 9 | cryptography==42.0.7 10 | dnspython==2.6.1 11 | dyntastic==0.15.0 12 | email_validator==2.1.1 13 | exceptiongroup==1.2.1 14 | fastapi==0.111.0 15 | fastapi-cli==0.0.3 16 | h11==0.14.0 17 | httpcore==1.0.5 18 | httptools==0.6.1 19 | httpx==0.27.0 20 | idna==3.7 21 | Jinja2==3.1.4 22 | jmespath==1.0.1 23 | markdown-it-py==3.0.0 24 | MarkupSafe==2.1.5 25 | mdurl==0.1.2 26 | orjson==3.10.3 27 | pycparser==2.22 28 | pydantic==2.7.1 29 | pydantic_core==2.18.2 30 | Pygments==2.18.0 31 | PyJWT==2.8.0 32 | python-dateutil==2.9.0.post0 33 | python-dotenv==1.0.1 34 | python-multipart==0.0.9 35 | PyYAML==6.0.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | redis 39 | s3transfer==0.10.1 40 | shellingham==1.5.4 41 | six==1.16.0 42 | sniffio==1.3.1 43 | starlette==0.37.2 44 | typer==0.12.3 45 | typing_extensions==4.11.0 46 | ujson==5.9.0 47 | urllib3==1.26.19 48 | uvicorn==0.29.0 49 | uvloop==0.19.0 50 | watchfiles==0.21.0 51 | websockets==12.0 52 | -------------------------------------------------------------------------------- /services/foundations_prompt_management/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_prompt_management/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | 12 | class PromptTemplate(Dyntastic): 13 | __table_name__ = lambda: os.environ.get("PROMPT_TEMPLATE_TABLE") 14 | __hash_key__ = "name" 15 | 16 | id: str = Field(default_factory=lambda: str(uuid.uuid4())) 17 | app_id: str 18 | name: str 19 | prompt_template: str 20 | version: int 21 | timestamp: datetime = Field(default_factory=datetime.now) 22 | 23 | ## Input / Output Models 24 | 25 | class CreatePromptTemplateRequest(BaseModel): 26 | name: str 27 | prompt_template: str 28 | 29 | class GetPromptTemplateRequest(BaseModel): 30 | name: str 31 | 32 | class GetPromptTemplateRequestByVersion(BaseModel): 33 | name: str 34 | vnum: int 35 | 36 | class TemplateResponse(BaseModel): 37 | id: str 38 | name: str 39 | prompt_template: str 40 | version: int 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /services/foundations_prompt_management/requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.6.0 2 | anyio==4.3.0 3 | boto3==1.34.102 4 | botocore==1.34.102 5 | certifi==2024.07.04 6 | cffi==1.16.0 7 | charset-normalizer==3.3.2 8 | click==8.1.7 9 | cryptography==42.0.7 10 | dnspython==2.6.1 11 | dyntastic==0.15.0 12 | email_validator==2.1.1 13 | exceptiongroup==1.2.1 14 | fastapi==0.111.0 15 | fastapi-cli==0.0.3 16 | h11==0.14.0 17 | httpcore==1.0.5 18 | httptools==0.6.1 19 | httpx==0.27.0 20 | idna==3.7 21 | Jinja2==3.1.4 22 | jmespath==1.0.1 23 | markdown-it-py==3.0.0 24 | MarkupSafe==2.1.5 25 | mdurl==0.1.2 26 | orjson==3.10.3 27 | pycparser==2.22 28 | pydantic==2.7.1 29 | pydantic_core==2.18.2 30 | Pygments==2.18.0 31 | PyJWT==2.8.0 32 | python-dateutil==2.9.0.post0 33 | python-dotenv==1.0.1 34 | python-multipart==0.0.9 35 | PyYAML==6.0.1 36 | requests==2.32.3 37 | rich==13.7.1 38 | s3transfer==0.10.1 39 | shellingham==1.5.4 40 | six==1.16.0 41 | sniffio==1.3.1 42 | starlette==0.37.2 43 | typer==0.12.3 44 | typing_extensions==4.11.0 45 | ujson==5.9.0 46 | urllib3==1.26.19 47 | uvicorn==0.29.0 48 | uvloop==0.19.0 49 | watchfiles==0.21.0 50 | websockets==12.0 51 | -------------------------------------------------------------------------------- /services/foundations_vector_job_process/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_vector_job_process/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import asyncio 5 | from fastapi import FastAPI, BackgroundTasks, HTTPException, Depends 6 | from pydantic import BaseModel 7 | import boto3 8 | from botocore.config import Config 9 | from typing import Dict, Any 10 | import requests 11 | from models import VectorizationJobs, VectorizationJobFiles 12 | 13 | from utils.vectorize import OpenSearchVectorDB 14 | 15 | 16 | # Configure structured logging 17 | logging.basicConfig(level=logging.INFO) 18 | logger = logging.getLogger("document_processor") 19 | logger.setLevel(logging.INFO) 20 | 21 | # Environment variables for local testing and ECS task 22 | VECTORIZATION_QUEUE_URL = os.getenv('VECTORIZATION_QUEUE_URL') 23 | VECTORIZE_JOBS_TABLE = os.getenv('VECTORIZE_JOBS_TABLE') 24 | VECTORIZE_JOB_FILES_TABLE = os.getenv('VECTORIZE_JOB_FILES_TABLE') 25 | RESULTS_S3_BUCKET = os.getenv('RESULTS_S3_BUCKET') 26 | MAX_RETRIES = int(os.getenv('MAX_RETRIES', '10')) 27 | REGION_NAME = '' 28 | MAX_CONCURRENT_TASKS = int(os.getenv('MAX_CONCURRENT_TASKS', '10')) 29 | VISIBILITY_TIMEOUT = int(os.getenv('VISIBILITY_TIMEOUT', '600')) # in seconds (10 minutes) 30 | ECS_METADATA_URL = os.getenv("ECS_CONTAINER_METADATA_URI_V4", "") 31 | 32 | 33 | # Global variables 34 | session = None 35 | s3_client = None 36 | sqs_client = None 37 | dynamodb = None 38 | 39 | retry_config = Config(retries={"max_attempts": MAX_RETRIES, "mode": "standard"}) 40 | 41 | # Background task checking interval (in seconds) 42 | CHECK_INTERVAL = 60 43 | poll_task = None 44 | 45 | app = FastAPI() 46 | 47 | class Doc(BaseModel): 48 | file_path: str 49 | 50 | # Dependency injection for AWS clients 51 | def get_boto3_clients(region_name): 52 | session = boto3.Session(region_name=region_name) 53 | s3_client = session.client('s3', config=retry_config) 54 | sqs_client = session.client('sqs', config=retry_config) 55 | dynamodb_client = session.client('dynamodb', config=retry_config) 56 | return s3_client, sqs_client, dynamodb_client 57 | 58 | def get_vector_db(host: str, index_name: str) -> OpenSearchVectorDB: 59 | return OpenSearchVectorDB(host=host, index_name=index_name, region_name=REGION_NAME) 60 | 61 | def update_job_entry(job_id: str, status:str, dynamodb): 62 | try: 63 | 64 | vectorize_job = VectorizationJobs.get(job_id) 65 | 66 | if vectorize_job: 67 | if status == "FAILED": 68 | vectorize_job.failed_file_count += 1 69 | if status == "COMPLETED": 70 | vectorize_job.completed_file_count += 1 71 | 72 | if vectorize_job.failed_file_count + vectorize_job.completed_file_count == vectorize_job.total_file_count: 73 | # Update job status to 'Completed' 74 | if vectorize_job.failed_file_count > 0 and vectorize_job.completed_file_count > 0: 75 | vectorize_job.status = "COMPLETED_WITH_ERRORS" 76 | elif vectorize_job.failed_file_count > 0: 77 | vectorize_job.status = "FAILED" 78 | else: 79 | vectorize_job.status = "COMPLETED" 80 | else: 81 | vectorize_job.status = "IN_PROGRESS" 82 | 83 | vectorize_job.save() 84 | 85 | else: 86 | logger.error(f"Job {job_id} not found in the database") 87 | 88 | logger.info(f"Results saved for job {job_id}") 89 | except Exception as e: 90 | logger.error(f"Error occurred while saving results: {e}") 91 | 92 | 93 | def update_job_file_entry(file_id:str, status:str, dynamodb): 94 | try: 95 | vectorize_job_file = VectorizationJobFiles.get(file_id) 96 | if vectorize_job_file: 97 | vectorize_job_file.status = status 98 | vectorize_job_file.save() 99 | logger.info(f"Results saved for job file {file_id}") 100 | except Exception as e: 101 | logger.error(f"Error occurred while saving results: {e}") 102 | 103 | 104 | async def perform_vectorization(file_path: str, file_id: str, app_id: str, vectorize_job_id: str, index_id: str, host:str, dynamodb, s3_client, sqs_client, receipt_handle: str): 105 | try: 106 | 107 | await extend_visibility_timeout(sqs_client, receipt_handle) 108 | 109 | # Read the text from the S3 file 110 | vector_db = get_vector_db(host, index_id) 111 | txt = vector_db.read_s3_txt(file_path, RESULTS_S3_BUCKET, s3_client) 112 | 113 | # Vectorize the text and store in OpenSearch 114 | vector_db.vectorize_and_store(txt) 115 | 116 | # Sample similiarity search 117 | # sim_docs = vector_db.docsearch.similarity_search("intrafusal fibers") 118 | 119 | sqs_client.delete_message( 120 | QueueUrl=VECTORIZATION_QUEUE_URL, 121 | ReceiptHandle=receipt_handle 122 | ) 123 | 124 | print(f"Deleted message from queue: {receipt_handle}") 125 | 126 | # Update the job status in the database 127 | update_job_file_entry(file_id, 'COMPLETED', dynamodb) 128 | update_job_entry(vectorize_job_id, 'COMPLETED', dynamodb) 129 | 130 | 131 | except Exception as e: 132 | job_results = { 133 | 'status': "Failed", 134 | 'error': str(e) 135 | } 136 | update_job_file_entry(file_id, 'FAILED', dynamodb) 137 | update_job_entry(app_id, 'FAILED', dynamodb) 138 | logger.error(f"Error occurred during extraction for job {job_id}: {e}") 139 | 140 | async def poll_sqs(sqs_client, dynamodb, s3_client, semaphore): 141 | logger.info("Polling SQS queue") 142 | 143 | while True: 144 | print("Polling SQS queue") 145 | try: 146 | response = sqs_client.receive_message( 147 | QueueUrl=VECTORIZATION_QUEUE_URL, 148 | MaxNumberOfMessages=3, 149 | WaitTimeSeconds=5, 150 | VisibilityTimeout=VISIBILITY_TIMEOUT # initial visibility timeout 151 | ) 152 | messages = response.get('Messages', []) 153 | logger.info(f"Received {len(messages)} messages") 154 | for message in messages: 155 | await semaphore.acquire() 156 | loop = asyncio.get_event_loop() 157 | loop.run_in_executor(None, handle_vectorization, semaphore, message, dynamodb, s3_client, sqs_client, message['ReceiptHandle']) 158 | await asyncio.sleep(1) 159 | except Exception as e: 160 | logger.error(f"Error occurred: {e}") 161 | await asyncio.sleep(5) 162 | 163 | def handle_vectorization(semaphore, message, dynamodb, s3_client, sqs_client, receipt_handle): 164 | try: 165 | message_body = json.loads(message['Body']) 166 | file_path = message_body['file_path'] 167 | app_id = message_body['app_id'] 168 | index_id = message_body['index_id'] 169 | index_name = message_body['index_name'] 170 | host = message_body['host'] 171 | file_id = message_body['file_id'] 172 | vectorize_job_id = message_body['vectorize_job_id'] 173 | 174 | # await perform_vectorization(file_path, file_id, app_id, vectorize_job_id, index_name, host, dynamodb, s3_client, sqs_client, receipt_handle) 175 | try: 176 | 177 | extend_visibility_timeout(sqs_client, receipt_handle) 178 | 179 | # Read the text from the S3 file 180 | # vector_db = get_vector_db(host, index_id) 181 | vector_db = get_vector_db(host, index_name) 182 | txt = vector_db.read_s3_txt(file_path, RESULTS_S3_BUCKET, s3_client) 183 | 184 | # Vectorize the text and store in OpenSearch 185 | vector_db.vectorize_and_store(txt) 186 | 187 | # Sample similiarity search 188 | # sim_docs = vector_db.docsearch.similarity_search("intrafusal fibers") 189 | 190 | sqs_client.delete_message( 191 | QueueUrl=VECTORIZATION_QUEUE_URL, 192 | ReceiptHandle=receipt_handle 193 | ) 194 | 195 | print(f"Deleted message from queue: {receipt_handle}") 196 | 197 | # Update the job status in the database 198 | update_job_file_entry(file_id, 'COMPLETED', dynamodb) 199 | update_job_entry(vectorize_job_id, 'COMPLETED', dynamodb) 200 | 201 | 202 | except Exception as e: 203 | job_results = { 204 | 'status': "Failed", 205 | 'error': str(e) 206 | } 207 | update_job_file_entry(file_id, 'FAILED', dynamodb) 208 | update_job_entry(app_id, 'FAILED', dynamodb) 209 | logger.error(f"Error occurred during vectorization: {e}") 210 | finally: 211 | semaphore.release() 212 | 213 | def extend_visibility_timeout(sqs_client, receipt_handle): 214 | try: 215 | sqs_client.change_message_visibility( 216 | QueueUrl=VECTORIZATION_QUEUE_URL, 217 | ReceiptHandle=receipt_handle, 218 | VisibilityTimeout=VISIBILITY_TIMEOUT # extend the visibility timeout 219 | ) 220 | except Exception as e: 221 | logger.error(f"Failed to extend visibility timeout: {e}") 222 | 223 | async def ensure_task_running(background_tasks: BackgroundTasks, sqs_client, dynamodb, s3_client): 224 | global poll_task 225 | logger.info("Starting background task") 226 | semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS) 227 | while True: 228 | if poll_task is None or poll_task.done(): 229 | logger.info("Polling task not running or done, starting new task") 230 | poll_task = asyncio.create_task(poll_sqs(sqs_client, dynamodb, s3_client, semaphore)) 231 | background_tasks.add_task(lambda: poll_task) 232 | logger.info("Started new polling task") 233 | await asyncio.sleep(CHECK_INTERVAL) 234 | 235 | 236 | @app.get("/vectorization/service/health") 237 | async def health_check(): 238 | return {"status": "UP"} 239 | 240 | @app.on_event("startup") 241 | async def startup_event(background_tasks: BackgroundTasks = BackgroundTasks()): 242 | 243 | global REGION_NAME 244 | 245 | if not ECS_METADATA_URL: 246 | raise HTTPException(status_code=500, detail="ECS_CONTAINER_METADATA_URI_V4 environment variable not set.") 247 | 248 | try: 249 | response = requests.get(ECS_METADATA_URL, timeout=10) 250 | response.raise_for_status() 251 | metadata = response.json() 252 | REGION_NAME = metadata.get("Labels", {}).get("com.amazonaws.ecs.task-arn", "").split(":")[3] 253 | 254 | s3_client, sqs_client, dynamodb_client = get_boto3_clients(REGION_NAME) 255 | 256 | 257 | except requests.exceptions.RequestException as e: 258 | raise HTTPException(status_code=500, detail=f"Error retrieving ECS metadata: {str(e)}") 259 | 260 | 261 | asyncio.create_task(ensure_task_running(background_tasks, sqs_client, dynamodb_client, s3_client)) 262 | -------------------------------------------------------------------------------- /services/foundations_vector_job_process/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field, model_validator 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | 12 | class VectorizationJobs(Dyntastic): 13 | 14 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOBS_TABLE") 15 | __hash_key__ = "vectorize_job_id" 16 | 17 | vectorize_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 18 | vector_store_id: str 19 | index_id: str 20 | chunking_job_id: str 21 | created_at: datetime = Field(default_factory=datetime.now) 22 | status: str 23 | total_file_count: int 24 | queued_files: int 25 | completed_file_count: int 26 | failed_file_count: int 27 | app_id: str 28 | updated_at: datetime = Field(default_factory=datetime.now) 29 | 30 | @model_validator(mode="before") 31 | def set_updated_at(cls, values): 32 | values["updated_at"] = datetime.now() 33 | return values 34 | 35 | class VectorizationJobFiles(Dyntastic): 36 | 37 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOB_FILES_TABLE") 38 | __hash_key__ = "vectorize_job_file_id" 39 | 40 | vectorize_job_file_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 41 | vectorize_job_id: str 42 | file_path: str 43 | status: str 44 | created_at: datetime = Field(default_factory=datetime.now) -------------------------------------------------------------------------------- /services/foundations_vector_job_process/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.5 2 | aiosignal==1.3.1 3 | annotated-types==0.7.0 4 | anyio==4.4.0 5 | async-timeout==4.0.3 6 | attrs==23.2.0 7 | boto3==1.34.117 8 | botocore==1.34.117 9 | certifi==2024.07.04 10 | charset-normalizer==3.3.2 11 | click==8.1.7 12 | dataclasses-json==0.6.6 13 | dnspython==2.6.1 14 | dyntastic==0.15.0 15 | email_validator==2.1.1 16 | Events==0.5 17 | exceptiongroup==1.2.1 18 | fastapi==0.111.0 19 | fastapi-cli==0.0.4 20 | frozenlist==1.4.1 21 | h11==0.14.0 22 | httpcore==1.0.5 23 | httptools==0.6.1 24 | httpx==0.27.0 25 | idna==3.7 26 | Jinja2==3.1.4 27 | jmespath==1.0.1 28 | jsonpatch==1.33 29 | jsonpointer==2.4 30 | langchain==0.2.7 31 | langchain-community==0.2.7 32 | langchain-core==0.2.18 33 | langchain-text-splitters==0.2.2 34 | langsmith==0.1.85 35 | markdown-it-py==3.0.0 36 | MarkupSafe==2.1.5 37 | marshmallow==3.21.2 38 | mdurl==0.1.2 39 | multidict==6.0.5 40 | mypy-extensions==1.0.0 41 | numpy==1.26.4 42 | opensearch-py==2.6.0 43 | orjson==3.10.3 44 | packaging==23.2 45 | pydantic==2.7.2 46 | pydantic_core==2.18.3 47 | Pygments==2.18.0 48 | python-dateutil==2.9.0.post0 49 | python-dotenv==1.0.1 50 | python-multipart==0.0.9 51 | PyYAML==6.0.1 52 | requests==2.32.3 53 | requests-aws4auth==1.2.3 54 | rich==13.7.1 55 | s3transfer==0.10.1 56 | shellingham==1.5.4 57 | six==1.16.0 58 | sniffio==1.3.1 59 | SQLAlchemy==2.0.30 60 | starlette==0.37.2 61 | tenacity==8.3.0 62 | typer==0.12.3 63 | typing-inspect==0.9.0 64 | typing_extensions==4.12.1 65 | ujson==5.10.0 66 | urllib3==1.26.19 67 | uvicorn==0.30.1 68 | uvloop==0.19.0 69 | watchfiles==0.22.0 70 | websockets==12.0 71 | yarl==1.9.4 72 | PyJWT==2.8.0 73 | -------------------------------------------------------------------------------- /services/foundations_vector_job_process/utils/vectorize.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import uuid 4 | from langchain_community.vectorstores import OpenSearchVectorSearch 5 | from langchain_community.embeddings.bedrock import BedrockEmbeddings 6 | from langchain_community.docstore.document import Document 7 | from requests_aws4auth import AWS4Auth 8 | from opensearchpy import RequestsHttpConnection, AWSV4SignerAuth 9 | from opensearchpy import OpenSearch, helpers 10 | from urllib.parse import urlparse 11 | import re 12 | import uuid 13 | import concurrent.futures 14 | 15 | import logging 16 | 17 | logging.basicConfig(level=logging.INFO) 18 | logger = logging.getLogger("document_processor") 19 | logger.setLevel(logging.INFO) 20 | 21 | 22 | def embed_document(embeddings, doc, index): 23 | return index, embeddings.embed_documents([doc.page_content])[0] 24 | 25 | class OpenSearchVectorDB: 26 | """A class to represent and interface with an OpenSearch Vector database.""" 27 | 28 | AOSS_SVC_NAME = "aoss" 29 | DEFAULT_TIMEOUT = 100 30 | 31 | def __init__(self, host=None, index_name=None, use_ssl=True, verify_certs=True, timeout=DEFAULT_TIMEOUT, region_name=None): 32 | """Initializes the OpenSearch Vector DB.""" 33 | self.host = host 34 | self.index_name = index_name 35 | self.use_ssl = use_ssl 36 | self.verify_certs = verify_certs 37 | self.timeout = timeout 38 | self.embeddings = BedrockEmbeddings() 39 | self.region_name = region_name 40 | self.opensearch_auth = AWSV4SignerAuth( 41 | boto3.Session().get_credentials(), self.region_name, self.AOSS_SVC_NAME) 42 | 43 | # Initialize vector search object 44 | self.docsearch = OpenSearchVectorSearch( 45 | opensearch_url=self.host, 46 | index_name=self.index_name, 47 | embedding_function=self.embeddings, 48 | http_auth=self.opensearch_auth, 49 | timeout=self.timeout, 50 | use_ssl=self.use_ssl, 51 | verify_certs=self.verify_certs, 52 | connection_class=RequestsHttpConnection, 53 | ) 54 | 55 | def read_s3_txt(self, s3_txt_path, bucket_name, s3_client): 56 | """Reads the text from an S3 file.""" 57 | response = s3_client.get_object(Bucket=bucket_name, Key=s3_txt_path) 58 | txt = response['Body'].read().decode('utf-8') 59 | # print(txt) 60 | return txt 61 | 62 | def vectorize_and_store(self, data=None): 63 | """Converts JSON data into chunks and vectors and stores them in OpenSearch.""" 64 | 65 | try: 66 | if data is not None: 67 | chunks = json.loads(data) 68 | 69 | # Create langchain documents 70 | docs = [Document(page_content=chunk['chunk']) for chunk in chunks] 71 | 72 | # Use ThreadPoolExecutor to parallelize the embedding process 73 | with concurrent.futures.ThreadPoolExecutor() as executor: 74 | futures = [executor.submit(embed_document, self.embeddings, doc, i) for i, doc in enumerate(docs)] 75 | results = [future.result() for future in concurrent.futures.as_completed(futures)] 76 | 77 | # Sort results by the original order 78 | results.sort(key=lambda x: x[0]) 79 | vectors = [result[1] for result in results] 80 | 81 | mapping_tuples = [(doc.page_content, vectors[i]) 82 | for i, doc in enumerate(docs)] 83 | 84 | # Add embeddings to OpenSearch 85 | self.docsearch.add_embeddings( 86 | text_embeddings=mapping_tuples, text_field="text", vector_field="vector_field") 87 | 88 | return "" 89 | 90 | except Exception as e: 91 | raise Exception(f"Error occurred during vectorization: {e}") 92 | 93 | 94 | def similarity_search(self, query, text_field="text", vector_field="vector_field"): 95 | """Searches the OpenSearch index for documents similar to the provided query.""" 96 | sim_docs = self.docsearch.similarity_search( 97 | query, text_field=text_field, vector_field=vector_field) 98 | 99 | sim_docs = [{"text": doc.page_content} for doc in sim_docs] 100 | 101 | return sim_docs -------------------------------------------------------------------------------- /services/foundations_vectorization/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.9-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . /app 6 | 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | RUN apk --no-cache add curl 10 | 11 | EXPOSE 80 12 | 13 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] 14 | 15 | -------------------------------------------------------------------------------- /services/foundations_vectorization/models.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from datetime import datetime 3 | from typing import Optional, List, Dict, Any 4 | 5 | from dyntastic import Dyntastic 6 | from pydantic import Field, model_validator 7 | import os 8 | from pydantic import BaseModel 9 | from enum import Enum 10 | 11 | 12 | 13 | class ChunkingJobs(Dyntastic): 14 | __table_name__ = lambda: os.environ.get("CHUNK_JOBS_TABLE") 15 | __hash_key__ = "chunking_job_id" 16 | 17 | chunking_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 18 | extraction_job_id: str 19 | app_id: str 20 | status: str 21 | chunking_strategy: str 22 | chunking_params: str 23 | total_file_count: int 24 | queued_files: int 25 | completed_files: int 26 | failed_files: int 27 | timestamp: datetime = Field(default_factory=datetime.now) 28 | updated_at: datetime = Field(default_factory=datetime.now) 29 | 30 | @model_validator(mode="before") 31 | def set_updated_at(cls, values): 32 | values["updated_at"] = datetime.now() 33 | return values 34 | 35 | class ChunkingJobFiles(Dyntastic): 36 | __table_name__ = lambda: os.environ.get("CHUNK_JOB_FILES_TABLE") 37 | __hash_key__ = "chunk_job_file_id" 38 | 39 | chunk_job_file_id: str 40 | chunking_job_id: str 41 | app_id: str 42 | file_name: str 43 | file_path: str 44 | file_id: str 45 | status: str 46 | timestamp: datetime = Field(default_factory=datetime.now) 47 | 48 | 49 | class VectorStore(Dyntastic): 50 | __table_name__ = lambda: os.environ.get("VECTOR_STORES_TABLE") 51 | __hash_key__ = "vector_store_id" 52 | __range_key__ = "app_id" 53 | 54 | vector_store_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 55 | store_name: str 56 | app_id: str 57 | created_at: datetime = Field(default_factory=datetime.now) 58 | host: str 59 | store_type: str 60 | 61 | class VectorIndex(Dyntastic): 62 | __table_name__ = lambda: os.environ.get("VECTOR_STORES_INDEX_TABLE") 63 | __hash_key__ = "index_id" 64 | 65 | index_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 66 | vector_store_id: str 67 | index_name: str 68 | created_at: datetime = Field(default_factory=datetime.now) 69 | 70 | class VectorizationJobs(Dyntastic): 71 | 72 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOBS_TABLE") 73 | __hash_key__ = "vectorize_job_id" 74 | 75 | vectorize_job_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 76 | vector_store_id: str 77 | index_id: str 78 | chunking_job_id: str 79 | created_at: datetime = Field(default_factory=datetime.now) 80 | status: str 81 | total_file_count: int 82 | queued_files: int 83 | completed_file_count: int 84 | failed_file_count: int 85 | app_id: str 86 | updated_at: datetime = Field(default_factory=datetime.now) 87 | 88 | @model_validator(mode="before") 89 | def set_updated_at(cls, values): 90 | values["updated_at"] = datetime.now() 91 | return values 92 | 93 | class VectorizationJobFiles(Dyntastic): 94 | 95 | __table_name__ = lambda: os.environ.get("VECTORIZE_JOB_FILES_TABLE") 96 | __hash_key__ = "vectorize_job_file_id" 97 | 98 | vectorize_job_file_id: str = Field(default_factory=lambda: str(uuid.uuid4())) 99 | vectorize_job_id: str 100 | file_path: str 101 | status: str 102 | created_at: datetime = Field(default_factory=datetime.now) 103 | 104 | 105 | ## Input / Output Models 106 | 107 | # Pydantic models 108 | class CreateVectorStoreRequest(BaseModel): 109 | store_name: str 110 | store_type: str 111 | description: Optional[str] = "Collection for storing vectorized documents" 112 | tags: List[Dict[str, str]] = [{"key": "project", "value": "GenerativeAI"}] 113 | 114 | class CreateVectorStoreResponse(BaseModel): 115 | store_name: str 116 | store_type: str 117 | store_id: str 118 | message: str 119 | 120 | class VectorStoreStatusRequest(BaseModel): 121 | store_id: str 122 | 123 | class VectorStoreStatusResponse(BaseModel): 124 | store_id: str 125 | status: str 126 | 127 | class VectorIndexStatusRequest(BaseModel): 128 | index_id: str 129 | 130 | class CreateIndexRequest(BaseModel): 131 | store_id: str 132 | index_name: str 133 | 134 | class CreateIndexResponse(BaseModel): 135 | index_name: str 136 | index_id: str 137 | store_id: str 138 | store_type: str 139 | message: str 140 | 141 | class VectorizeRequest(BaseModel): 142 | data: Optional[List[Dict[str, str]]] = None 143 | s3_txt_path: Optional[str] = None 144 | host: Optional[str] = None 145 | collection_name: Optional[str] = None 146 | 147 | class VectorizationJobStatusResponse(BaseModel): 148 | vectorize_job_id: str 149 | vector_store_id: str 150 | index_id: str 151 | chunking_job_id: str 152 | total_file_count: int 153 | completed_file_count: int 154 | failed_file_count: int 155 | status: str 156 | 157 | class SemanticSearchRequest(BaseModel): 158 | query: str 159 | index_id: str 160 | 161 | class VectorizeRequestChunkJobInput(BaseModel): 162 | chunking_job_id: str 163 | index_id: str 164 | 165 | class VectorizeResponse(BaseModel): 166 | vectorize_job_id: str 167 | status: str -------------------------------------------------------------------------------- /services/foundations_vectorization/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.9.5 2 | aiosignal==1.3.1 3 | annotated-types==0.7.0 4 | anyio==4.4.0 5 | async-timeout==4.0.3 6 | attrs==23.2.0 7 | boto3==1.34.117 8 | botocore==1.34.117 9 | certifi==2024.07.04 10 | charset-normalizer==3.3.2 11 | click==8.1.7 12 | dataclasses-json==0.6.6 13 | dnspython==2.6.1 14 | dyntastic==0.15.0 15 | email_validator==2.1.1 16 | Events==0.5 17 | exceptiongroup==1.2.1 18 | fastapi==0.111.0 19 | fastapi-cli==0.0.4 20 | frozenlist==1.4.1 21 | h11==0.14.0 22 | httpcore==1.0.5 23 | httptools==0.6.1 24 | httpx==0.27.0 25 | idna==3.7 26 | Jinja2==3.1.4 27 | jmespath==1.0.1 28 | jsonpatch==1.33 29 | jsonpointer==2.4 30 | langchain==0.2.7 31 | langchain-community==0.2.7 32 | langchain-core==0.2.18 33 | langchain-text-splitters==0.2.2 34 | langsmith==0.1.85 35 | markdown-it-py==3.0.0 36 | MarkupSafe==2.1.5 37 | marshmallow==3.21.2 38 | mdurl==0.1.2 39 | multidict==6.0.5 40 | mypy-extensions==1.0.0 41 | numpy==1.26.4 42 | opensearch-py==2.6.0 43 | orjson==3.10.3 44 | packaging==23.2 45 | pydantic==2.7.2 46 | pydantic_core==2.18.3 47 | Pygments==2.18.0 48 | python-dateutil==2.9.0.post0 49 | python-dotenv==1.0.1 50 | python-multipart==0.0.9 51 | PyYAML==6.0.1 52 | requests==2.32.3 53 | requests-aws4auth==1.2.3 54 | rich==13.7.1 55 | s3transfer==0.10.1 56 | shellingham==1.5.4 57 | six==1.16.0 58 | sniffio==1.3.1 59 | SQLAlchemy==2.0.30 60 | starlette==0.37.2 61 | tenacity==8.3.0 62 | typer==0.12.3 63 | typing-inspect==0.9.0 64 | typing_extensions==4.12.1 65 | ujson==5.10.0 66 | urllib3==1.26.19 67 | uvicorn==0.30.1 68 | uvloop==0.19.0 69 | watchfiles==0.22.0 70 | websockets==12.0 71 | yarl==1.9.4 72 | PyJWT==2.8.0 73 | -------------------------------------------------------------------------------- /services/foundations_vectorization/utils/opensearchutil.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | import uuid 4 | from langchain_community.vectorstores import OpenSearchVectorSearch 5 | from langchain_community.embeddings.bedrock import BedrockEmbeddings 6 | from langchain_community.docstore.document import Document 7 | from requests_aws4auth import AWS4Auth 8 | from opensearchpy import RequestsHttpConnection, AWSV4SignerAuth 9 | from opensearchpy import OpenSearch, helpers 10 | from urllib.parse import urlparse 11 | import re 12 | import uuid 13 | 14 | 15 | class OpenSearchServerlessManager: 16 | """Manages OpenSearch Serverless resources such as security policies and collections.""" 17 | 18 | def __init__(self, region_name): 19 | """Initialize the OpenSearch Serverless client.""" 20 | self.client = boto3.client( 21 | "opensearchserverless", region_name=region_name) 22 | 23 | def create_security_policy(self, name, policy_type, description, policy_json): 24 | """Creates a security policy in OpenSearch Serverless.""" 25 | client_token = str(uuid.uuid4()) # Generate a unique client token 26 | 27 | response = self.client.create_security_policy( 28 | clientToken=client_token, 29 | description=description, 30 | name=name, 31 | policy=json.dumps(policy_json), # Convert dict to JSON string 32 | type=policy_type 33 | ) 34 | 35 | return response['securityPolicyDetail'] 36 | 37 | def create_encryption_policy(self, name, description, collection_pattern): 38 | """Creates an encryption policy with a resource pattern that matches the collection name pattern.""" 39 | policy_json = { 40 | "Rules": [ 41 | { 42 | "ResourceType": "collection", 43 | "Resource": [f"collection/{collection_pattern}"] 44 | } 45 | ], 46 | "AWSOwnedKey": True # Use AWS-managed encryption key 47 | } 48 | 49 | return self.create_security_policy( 50 | name=name, 51 | policy_type="encryption", 52 | description=description, 53 | policy_json=policy_json 54 | ) 55 | 56 | def create_network_policy(self, name, description, collection_pattern, allow_public=False, vpce_id=None): 57 | """Creates a network policy with access rules for a specific collection pattern.""" 58 | policy_json = [ 59 | { 60 | "Description": description, 61 | "SourceVPCEs":[ 62 | vpce_id 63 | ], 64 | "Rules": [ 65 | { 66 | "ResourceType": "dashboard", 67 | "Resource": [f"collection/{collection_pattern}"] 68 | }, 69 | { 70 | "ResourceType": "collection", 71 | "Resource": [f"collection/{collection_pattern}"] 72 | } 73 | ], 74 | "AllowFromPublic": allow_public 75 | } 76 | ] 77 | 78 | return self.create_security_policy( 79 | name=name, 80 | policy_type="network", 81 | description=description, 82 | policy_json=policy_json 83 | ) 84 | 85 | def create_data_access_policy(self, name, description, collection_pattern, index_name, role_arn, allow_public=True): 86 | """Creates a data access policy with access rules for a specific collection pattern.""" 87 | policy_json = """[ 88 | { 89 | "Rules": [ 90 | { 91 | "ResourceType": "collection", 92 | "Resource": ["collection/{collection_pattern}"], 93 | "Permission": ["aoss:UpdateCollectionItems"] 94 | }, 95 | { 96 | "ResourceType": "index", 97 | "Resource": ["index/{collection_pattern}/*"], 98 | "Permission": ["aoss:CreateIndex","aoss:DeleteIndex","aoss:UpdateIndex","aoss:DescribeIndex","aoss:ReadDocument","aoss:WriteDocument"] 99 | } 100 | ], 101 | "Principal": ["{role_arn}"] 102 | } 103 | ]""" 104 | policy_json = policy_json.replace("{collection_pattern}", collection_pattern).replace("{role_arn}", role_arn) 105 | print(policy_json) 106 | 107 | return self.client.create_access_policy( 108 | name=name, 109 | type="data", 110 | clientToken=str(uuid.uuid4()), 111 | description=description, 112 | policy=policy_json 113 | ) 114 | 115 | def create_collection(self, collection_name, description="", standby_replicas="DISABLED", tags=None): 116 | """Creates an OpenSearch Serverless collection.""" 117 | client_token = str(uuid.uuid4()) # Generate a unique client token 118 | tags = tags or [] 119 | 120 | response = self.client.create_collection( 121 | clientToken=client_token, 122 | description=description, 123 | name=collection_name, 124 | standbyReplicas=standby_replicas, 125 | tags=tags, 126 | type='VECTORSEARCH' 127 | ) 128 | 129 | return response['createCollectionDetail'] 130 | 131 | 132 | 133 | class OpenSearchVectorDB: 134 | """A class to represent and interface with an OpenSearch Vector database.""" 135 | 136 | AOSS_SVC_NAME = "aoss" 137 | DEFAULT_TIMEOUT = 100 138 | 139 | 140 | 141 | def get_auth(self): 142 | credentials = self.session.get_credentials() 143 | 144 | access_key = credentials.access_key 145 | secret_key = credentials.secret_key 146 | token = credentials.token 147 | 148 | auth = AWS4Auth(access_key, secret_key, 149 | self.region , 'aoss', session_token=token) 150 | 151 | return auth 152 | 153 | 154 | 155 | def __init__(self, host=None, index_name=None, region=None, use_ssl=True, verify_certs=True, timeout=DEFAULT_TIMEOUT): 156 | """Initializes the OpenSearch Vector DB.""" 157 | self.host = host 158 | self.index_name = index_name 159 | self.use_ssl = use_ssl 160 | self.verify_certs = verify_certs 161 | self.timeout = timeout 162 | self.embeddings = BedrockEmbeddings() 163 | self.region = region 164 | self.session = boto3.Session(region_name=region) 165 | self.credentials = self.session.get_credentials() 166 | self.opensearch_auth = self.get_auth() 167 | # Initialize vector search object 168 | self.docsearch = OpenSearchVectorSearch( 169 | opensearch_url=self.host, 170 | index_name=self.index_name, 171 | embedding_function=self.embeddings, 172 | http_auth=self.opensearch_auth, 173 | timeout=self.timeout, 174 | use_ssl=self.use_ssl, 175 | verify_certs=self.verify_certs, 176 | connection_class=RequestsHttpConnection, 177 | ) 178 | 179 | def create_index(self, index_name=None): 180 | """Creates the OpenSearch index if it doesn't exist.""" 181 | index_body = { 182 | "settings": { 183 | "index.knn": True 184 | }, 185 | "mappings": { 186 | "properties": { 187 | "vector_field": { 188 | "type": "knn_vector", 189 | "dimension": 1536 190 | }, 191 | "text": { 192 | "type": "text" 193 | } 194 | } 195 | } 196 | } 197 | 198 | client = OpenSearch( 199 | hosts=[self.host], 200 | http_auth=self.opensearch_auth, 201 | use_ssl=self.use_ssl, 202 | verify_certs=self.verify_certs, 203 | connection_class=RequestsHttpConnection, 204 | ) 205 | 206 | if not client.indices.exists(index_name): 207 | client.indices.create(index=index_name, body=index_body) 208 | 209 | def get_index_status(self, index_name=None): 210 | """Returns the status of the OpenSearch index.""" 211 | client = OpenSearch( 212 | hosts=[self.host], 213 | http_auth=self.opensearch_auth, 214 | use_ssl=self.use_ssl, 215 | verify_certs=self.verify_certs, 216 | connection_class=RequestsHttpConnection, 217 | ) 218 | 219 | return client.indices.get(index=index_name) 220 | 221 | def similarity_search(self, query, text_field="text", vector_field="vector_field"): 222 | """Searches the OpenSearch index for documents similar to the provided query.""" 223 | sim_docs = self.docsearch.similarity_search( 224 | query, text_field=text_field, vector_field=vector_field) 225 | print(sim_docs) 226 | 227 | sim_docs = [{"text": doc.page_content} for doc in sim_docs] 228 | 229 | return sim_docs -------------------------------------------------------------------------------- /testing/auth/test-service.py: -------------------------------------------------------------------------------- 1 | ############################################### 2 | # This script tests the GenAI Foundational Architecture's API endpoints for authentication 3 | # It uses the requests library to send HTTP requests to the API and pytest to run the tests. 4 | # We pass valid and invalid JWT tokens in the Authorization header to test the authentication mechanism. 5 | # It does not test the functionality of the endpoints, only the authentication. 6 | # Replace the placeholders with your actual API URL, valid token, and invalid token. 7 | ############################################### 8 | 9 | import json 10 | import pytest 11 | import requests 12 | 13 | # Replace these with your actual API URL and tokens 14 | API_URL = "" 15 | VALID_TOKEN = "" 16 | INVALID_TOKEN = "" 17 | 18 | # Sample data for testing the invoke_model endpoint 19 | invoke_model_data = { 20 | "model_name": "ANTHROPIC_CLAUDE_3_SONNET_V1", 21 | "prompt": "Translate the following text to French: 'Hello, how are you?'", 22 | "max_tokens": 100, 23 | "temperature": 0.7, 24 | "top_p": 0.9, 25 | "top_k": 50, 26 | "stop_sequences": ["\\n"] 27 | } 28 | 29 | # Sample data for other endpoints 30 | raw_input_data = { 31 | "model_id": "anthropic.claude-3-sonnet-20240229-v1:0", 32 | "raw_input": { 33 | "text": "Hello, how are you?" 34 | } 35 | } 36 | 37 | embed_model_data = { 38 | "model_name": "example_model", 39 | "input_text": "Hello, how are you?" 40 | } 41 | 42 | chunking_job_data = { 43 | "extraction_job_id": "example_extraction_job_id", 44 | "chunking_strategy": "fixed_size", 45 | "chunking_params": { 46 | "chunk_size": 1000, 47 | "chunk_overlap": 100 48 | } 49 | } 50 | 51 | register_file_data = { 52 | "extraction_job_id": "example_extraction_job_id", 53 | "file_name": "example_file.txt" 54 | } 55 | 56 | start_job_data = { 57 | "extraction_job_id": "example_extraction_job_id" 58 | } 59 | 60 | file_status_data = { 61 | "extraction_job_id": "example_extraction_job_id", 62 | "file_name": "example_file.txt" 63 | } 64 | 65 | vector_store_data = { 66 | "store_type": "opensearchserverless", 67 | "description": "A vector store for semantic search.", 68 | "tags": { 69 | "environment": "production", 70 | "team": "data" 71 | } 72 | } 73 | 74 | vector_store_status_data = { 75 | "store_id": "example_store_id" 76 | } 77 | 78 | create_index_data = { 79 | "store_id": "example_store_id", 80 | "index_name": "my_index" 81 | } 82 | 83 | index_status_data = { 84 | "index_id": "example_index_id" 85 | } 86 | 87 | vectorize_data = { 88 | "chunking_job_id": "example_chunking_job_id", 89 | "index_id": "example_index_id" 90 | } 91 | 92 | semantic_search_data = { 93 | "query": "what is AWS?", 94 | "index_id": "example_index_id" 95 | } 96 | 97 | prompt_template_data = { 98 | "name": "CHATBOT_PROMPT", 99 | "prompt_template": "Given the following information, answer the question. Context {context}. Question {question}" 100 | } 101 | 102 | get_prompt_template_data = { 103 | "name": "CHATBOT_PROMPT" 104 | } 105 | 106 | prompt_template_version_data = { 107 | "name": "CHATBOT_PROMPT", 108 | "vnum": 1 109 | } 110 | 111 | @pytest.fixture 112 | def valid_headers(): 113 | return {"Authorization": f"Bearer {VALID_TOKEN}"} 114 | 115 | @pytest.fixture 116 | def invalid_headers(): 117 | return {"Authorization": f"Bearer {INVALID_TOKEN}"} 118 | 119 | def test_health_check(valid_headers): 120 | response = requests.get(f"{API_URL}/model/service/health", headers=valid_headers) 121 | assert response.status_code == 200 122 | assert response.json() == {"status": "UP"} 123 | 124 | def test_invoke_model_with_valid_token(valid_headers): 125 | response = requests.post(f"{API_URL}/model/invoke", json=invoke_model_data, headers=valid_headers) 126 | assert response.status_code == 200 127 | assert "output_text" in response.json() 128 | assert "input_tokens" in response.json() 129 | assert "output_tokens" in response.json() 130 | 131 | def test_invoke_model_with_invalid_token(invalid_headers): 132 | response = requests.post(f"{API_URL}/model/invoke", json=invoke_model_data, headers=invalid_headers) 133 | assert response.status_code == 401 134 | 135 | def test_invoke_model_without_token(): 136 | response = requests.post(f"{API_URL}/model/invoke", json=invoke_model_data) 137 | assert response.status_code == 401 138 | 139 | def test_async_invoke_model_with_valid_token(valid_headers): 140 | response = requests.post(f"{API_URL}/model/async_invoke", json=invoke_model_data, headers=valid_headers) 141 | assert response.status_code == 200 142 | assert "invocation_id" in response.json() 143 | 144 | def test_async_invoke_model_with_invalid_token(invalid_headers): 145 | response = requests.post(f"{API_URL}/model/async_invoke", json=invoke_model_data, headers=invalid_headers) 146 | assert response.status_code == 401 147 | 148 | def test_async_invoke_model_without_token(): 149 | response = requests.post(f"{API_URL}/model/async_invoke", json=invoke_model_data) 150 | assert response.status_code == 401 151 | 152 | def test_invoke_with_raw_input(valid_headers): 153 | response = requests.post(f"{API_URL}/model/invoke_with_raw_input", json=raw_input_data, headers=valid_headers) 154 | assert response.status_code in [200, 400, 500] 155 | 156 | def test_invoke_embed_with_valid_token(valid_headers): 157 | response = requests.post(f"{API_URL}/model/embed", json=embed_model_data, headers=valid_headers) 158 | assert response.status_code in [200, 400, 500] 159 | 160 | def test_create_extraction_job_with_invalid_token(invalid_headers): 161 | response = requests.get(f"{API_URL}/document/extraction/create_job", headers=invalid_headers) 162 | assert response.status_code == 401 163 | 164 | def test_create_extraction_job_without_token(): 165 | response = requests.get(f"{API_URL}/document/extraction/create_job") 166 | assert response.status_code == 401 167 | 168 | def test_register_file_with_invalid_token(invalid_headers): 169 | response = requests.post(f"{API_URL}/document/extraction/register_file", json=register_file_data, headers=invalid_headers) 170 | assert response.status_code == 401 171 | 172 | def test_register_file_without_token(): 173 | response = requests.post(f"{API_URL}/document/extraction/register_file", json=register_file_data) 174 | assert response.status_code == 401 175 | 176 | def test_start_extraction_job_with_invalid_token(invalid_headers): 177 | response = requests.post(f"{API_URL}/document/extraction/start_job", json=start_job_data, headers=invalid_headers) 178 | assert response.status_code == 401 179 | 180 | def test_start_extraction_job_without_token(): 181 | response = requests.post(f"{API_URL}/document/extraction/start_job", json=start_job_data) 182 | assert response.status_code == 401 183 | 184 | def test_get_job_files_with_invalid_token(invalid_headers): 185 | response = requests.get(f"{API_URL}/document/extraction/job_files/example_extraction_job_id", headers=invalid_headers) 186 | assert response.status_code == 401 187 | 188 | def test_get_job_files_without_token(): 189 | response = requests.get(f"{API_URL}/document/extraction/job_files/example_extraction_job_id") 190 | assert response.status_code == 401 191 | 192 | def test_get_job_status_with_invalid_token(invalid_headers): 193 | response = requests.get(f"{API_URL}/document/extraction/job_status/example_extraction_job_id", headers=invalid_headers) 194 | assert response.status_code == 401 195 | 196 | def test_get_job_status_without_token(): 197 | response = requests.get(f"{API_URL}/document/extraction/job_status/example_extraction_job_id") 198 | assert response.status_code == 401 199 | 200 | def test_get_file_status_with_invalid_token(invalid_headers): 201 | response = requests.post(f"{API_URL}/document/extraction/file_status", json=file_status_data, headers=invalid_headers) 202 | assert response.status_code == 401 203 | 204 | def test_get_file_status_without_token(): 205 | response = requests.post(f"{API_URL}/document/extraction/file_status", json=file_status_data) 206 | assert response.status_code == 401 207 | 208 | def test_vector_store_with_invalid_token(invalid_headers): 209 | response = requests.post(f"{API_URL}/vector/store/create", json=vector_store_data, headers=invalid_headers) 210 | assert response.status_code == 401 211 | 212 | def test_vector_store_without_token(): 213 | response = requests.post(f"{API_URL}/vector/store/create", json=vector_store_data) 214 | assert response.status_code == 401 215 | 216 | def test_prompt_template_with_invalid_token(invalid_headers): 217 | response = requests.post(f"{API_URL}/prompt/template/save", json=prompt_template_data, headers=invalid_headers) 218 | assert response.status_code == 401 219 | 220 | def test_prompt_template_without_token(): 221 | response = requests.post(f"{API_URL}/prompt/template/save", json=prompt_template_data) 222 | assert response.status_code == 401 223 | 224 | if __name__ == "__main__": 225 | pytest.main(["-v", "test-service.py"]) 226 | -------------------------------------------------------------------------------- /testing/models/test_model_invoke.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | import time 4 | 5 | # Set the base URL for the FastAPI app 6 | BASE_URL = "" 7 | 8 | # Set the Authorization token 9 | AUTH_TOKEN = "" 10 | 11 | # Define headers with the token 12 | headers = { 13 | "Authorization": f"Bearer {AUTH_TOKEN}", 14 | "Content-Type": "application/json" 15 | } 16 | 17 | # call /list_models to get the list of models 18 | response = requests.get(f"{BASE_URL}/list_models", headers=headers) 19 | model_names = [model["model_name"] for model in response.json()['text_models']] 20 | embed_model_names = [model["model_name"] for model in response.json()['embed_models']] 21 | 22 | # Standard request data for invoke and async_invoke endpoints 23 | standard_request_data_string = { 24 | "model_name": "example_model", 25 | "prompt": "Translate the following text to French: 'Hello, how are you?'", 26 | "max_tokens": 100, 27 | "temperature": 0.7, 28 | "top_p": 0.9, 29 | "top_k": 50 30 | } 31 | 32 | standard_request_data_json = { 33 | "model_name": "example_model", 34 | "prompt": [ 35 | { 36 | "role": "user", 37 | "content": [{"text": "What is the weather like today?"}] 38 | }, 39 | { 40 | "role": "assistant", 41 | "content": [{"text": "The weather is sunny with a high of 25°C."}] 42 | }, 43 | { 44 | "role": "user", 45 | "content": [{"text": "Farenheit or Celsius?"}] 46 | } 47 | ], 48 | "max_tokens": 100, 49 | "temperature": 0.7, 50 | "top_p": 0.9, 51 | "top_k": 50, 52 | "system": [ 53 | { 54 | "text": "Your assistant is here to help you with your questions." 55 | } 56 | ] 57 | } 58 | 59 | standard_request_embed_data = { 60 | "model_name": "example_model", 61 | "input_text": "Translate the following text to French: 'Hello, how are you?'" 62 | } 63 | 64 | @pytest.mark.parametrize("model_name", model_names) 65 | @pytest.mark.parametrize("request_data", [standard_request_data_string, standard_request_data_json]) 66 | def test_invoke(model_name, request_data): 67 | data = request_data.copy() 68 | data["model_name"] = model_name 69 | 70 | if model_name.startswith("TITAN"): 71 | data["stop_sequences"] = ["User:"] 72 | else: 73 | data["stop_sequences"] = ["\\n"] 74 | 75 | response = requests.post(f"{BASE_URL}/invoke", headers=headers, json=data) 76 | print("*"*50) 77 | print(response.text) 78 | print("*"*50) 79 | assert response.status_code == 200 80 | print(f"Response for /invoke with model {model_name}: {response.json()}") 81 | 82 | # Test Embed models 83 | @pytest.mark.parametrize("model_name", embed_model_names) 84 | @pytest.mark.parametrize("request_data", [standard_request_embed_data]) 85 | def test_invoke_embed(model_name, request_data): 86 | print(f"Testing /invoke with model {model_name}") 87 | data = request_data.copy() 88 | data["model_name"] = model_name 89 | 90 | response = requests.post(f"{BASE_URL}/embed", headers=headers, json=data) 91 | print("*"*50) 92 | print(response.text) 93 | print("*"*50) 94 | assert response.status_code == 200 95 | print(f"Response for /invoke with model {model_name}: {response.json()}") 96 | 97 | @pytest.mark.parametrize("model_name", model_names) 98 | @pytest.mark.parametrize("request_data", [standard_request_data_string]) 99 | def test_async_invoke(model_name, request_data): 100 | data = request_data.copy() 101 | data["model_name"] = model_name 102 | 103 | if model_name.startswith("TITAN"): 104 | data["stop_sequences"] = ["User:"] 105 | else: 106 | data["stop_sequences"] = ["\\n"] 107 | 108 | response = requests.post(f"{BASE_URL}/async_invoke", headers=headers, json=data) 109 | assert response.status_code == 200 110 | invocation_id = response.json().get("invocation_id") 111 | print(f"Invocation ID for /async_invoke with model {model_name}: {invocation_id}") 112 | 113 | # Poll for the result 114 | time.sleep(10) 115 | result_response = requests.get(f"{BASE_URL}/async_output/{invocation_id}", headers=headers) 116 | assert result_response.status_code == 200 117 | print(f"Result for async_invoke with model {model_name}: {result_response.json()}") 118 | 119 | 120 | if __name__ == "__main__": 121 | print("Running tests") 122 | pytest.main(["-s", "test_model_invoke.py"]) 123 | --------------------------------------------------------------------------------