├── .codecatalyst └── workflows │ ├── Build_Deploy_Dev.yaml │ └── Build_Deploy_QA.yaml ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.yml │ └── config.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── Dockerfile ├── FloTorch Colour Logo.png ├── Help_Links.MD ├── LICENSE ├── README.md ├── app ├── Dockerfile ├── __init__.py ├── common │ ├── __init__.py │ └── logger.py ├── configuration_validation.py ├── dependencies │ ├── __init__.py │ ├── database.py │ └── s3.py ├── main.py ├── marketplace_app.Dockerfile ├── models.py ├── nginx │ ├── auth.conf │ ├── nginx.conf │ ├── nginxconfig.io │ │ ├── general.conf │ │ ├── proxy.conf │ │ └── security.conf │ └── sites-enabled │ │ └── app.conf ├── orchestrator.py ├── price_calculator.py ├── requirements.txt ├── routes │ ├── __init__.py │ ├── bedrock_config.py │ ├── config.py │ ├── cost_and_duration_calculation.py │ ├── execution.py │ ├── experiment.py │ ├── expert_eval.py │ ├── health.py │ └── uploads.py └── seed_data.py ├── baseclasses └── base_classes.py ├── cfn ├── app-runner-template.yaml ├── dynamodb-template.yaml ├── ecr-repository-template.yaml ├── ecs-template.yaml ├── lambda-template.yaml ├── master-template.yaml ├── opensearch-template.yaml ├── state-machine-template.yaml ├── vpc-endpoint-template.yaml └── vpc-template.yaml ├── config ├── __init__.py ├── config.py └── experimental_config.py ├── constants ├── __init__.py ├── app_constants.py ├── sagemaker_constants.py └── validation_status.py ├── core ├── __init__.py ├── chunking │ ├── __init__.py │ ├── fixed_chunker.py │ └── hierarchical_chunker.py ├── dynamodb.py ├── embedding │ ├── __init__.py │ ├── bedrock │ │ ├── __init__.py │ │ ├── bedrock_embedder.py │ │ ├── cohere_embedder.py │ │ ├── titanv1_embedder.py │ │ └── titanv2_embedder.py │ ├── embedding_factory.py │ └── sagemaker │ │ ├── __init__.py │ │ └── sagemaker_embedder.py ├── eval │ ├── __init__.py │ ├── eval_factory.py │ └── ragas │ │ ├── ragas_eval.py │ │ ├── ragas_llm_eval.py │ │ └── ragas_non_llm_eval.py ├── guardrails │ └── bedrock_guardrails.py ├── inference │ ├── __init__.py │ ├── bedrock │ │ └── bedrock_inferencer.py │ ├── inference_factory.py │ └── sagemaker │ │ ├── llama_inferencer.py │ │ └── sagemaker_inferencer.py ├── knowledgebase_vectorstore.py ├── opensearch_vectorstore.py ├── processors │ ├── __init__.py │ ├── chunking_processor.py │ ├── embed_processor.py │ ├── eval_processor.py │ └── inference_processor.py ├── rerank │ └── rerank.py └── service │ └── experimental_config_service.py ├── cover-image.png ├── dataset ├── amazon_bedrock_userguide │ ├── Amazon_Bedrock_Dataset.pdf │ ├── Amazon_Bedrock_Prompt.json │ └── Amazon_bedrock_gt.json └── medical_abstracts │ ├── medical_abstracts.pdf │ ├── medical_prompt.json │ └── medical_qa_50.json ├── docs └── pricing-calculation.md ├── evaluation ├── Dockerfile ├── eval.py ├── fargate_evaluation.Dockerfile └── requirements.txt ├── faq.md ├── flotorch-arch.png ├── handlers ├── fargate_eval_handler.py ├── fargate_indexing_handler.py ├── fargate_retriever_handler.py └── task_processor.py ├── indexing ├── Dockerfile ├── __init__.py ├── fargate_indexing.Dockerfile ├── indexing.py └── requirements.txt ├── install.md ├── lambda_handlers ├── cost_handler │ ├── Dockerfile │ ├── cost_compute_handler.py │ ├── pricing.py │ ├── requirements.txt │ └── utils.py ├── evaluation_handler.py ├── indexing_handler.py ├── opensearch_handler.py └── retriever_handler.py ├── opensearch ├── opensearch.Dockerfile ├── opensearch_index_manager.py └── opensearch_requirements.txt ├── provision.sh ├── release_notes ├── Release v1.0.2.md ├── Release v2.0.0.md └── images │ ├── Hierarchical_Chunking.png │ ├── Indexing_Algorthm_HNSW_BQ_SQ.png │ └── ReRanking.png ├── retriever ├── Dockerfile ├── fargate_retriever.Dockerfile ├── requirements.txt └── retriever.py ├── test ├── FloTorch.ai TestReport.pdf ├── Integration Test Report.pdf └── Test Plan for FloTorch.pdf ├── ui ├── .env.example ├── .gitignore ├── .ncurc.json ├── .npmrc ├── .vscode │ └── settings.json ├── README.md ├── app │ ├── app.config.ts │ ├── app.vue │ ├── assets │ │ └── css │ │ │ └── main.css │ ├── components │ │ ├── Breadcumb.vue │ │ ├── DownloadResultsButton.vue │ │ ├── FetchKbModels.vue │ │ ├── FieldTooltip.vue │ │ ├── File │ │ │ ├── Upload.vue │ │ │ ├── UploadKb.vue │ │ │ └── UploadModal.vue │ │ ├── ModelSelect.vue │ │ ├── Page.vue │ │ ├── Project │ │ │ ├── Create │ │ │ │ ├── DataStrategyStep.vue │ │ │ │ ├── EvalStrategyStep.vue │ │ │ │ ├── Form.vue │ │ │ │ ├── IndexingStrategyStep.vue │ │ │ │ └── RetrievalStrategyStep.vue │ │ │ ├── DownloadConfigButton.vue │ │ │ ├── Experiment │ │ │ │ ├── Assessments.vue │ │ │ │ ├── DetailsButton.vue │ │ │ │ ├── DirectionalPricing.vue │ │ │ │ ├── HumanEvaluation.vue │ │ │ │ ├── List.vue │ │ │ │ └── ValidList.vue │ │ │ ├── List.vue │ │ │ └── UploadConfigButton.vue │ │ ├── PromptGuideHelp.vue │ │ ├── PromptGuideSelect.vue │ │ ├── RegionSelect.vue │ │ └── VectorDimensionSelect.vue │ ├── composables │ │ ├── api.ts │ │ ├── fileoperation.ts │ │ ├── projects.ts │ │ ├── shared.ts │ │ ├── tooltip.ts │ │ └── useShareData.ts │ ├── layouts │ │ └── default.vue │ ├── pages │ │ ├── index.vue │ │ └── projects │ │ │ ├── [id].vue │ │ │ ├── [id] │ │ │ ├── execute.vue │ │ │ ├── experiments │ │ │ │ ├── [experimentId].vue │ │ │ │ └── index.vue │ │ │ ├── humanevaluation.vue │ │ │ ├── index.vue │ │ │ └── validexperiments.vue │ │ │ ├── create.vue │ │ │ └── index.vue │ ├── plugins │ │ ├── 03.vuequery.ts │ │ └── mitt.ts │ └── utils │ │ └── json-csv.ts ├── eslint.config.mjs ├── nuxt.config.ts ├── package.json ├── pnpm-lock.yaml ├── public │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── favicon.png │ ├── flotorch-hero.gif │ ├── logo.png │ ├── prompt-guide.json │ ├── robots.txt │ └── site.webmanifest ├── server │ └── tsconfig.json ├── shared │ └── types │ │ ├── breadcumb.type.ts │ │ ├── experiments.type.ts │ │ ├── humaneval.type.ts │ │ └── projects.type.ts └── tsconfig.json ├── usage_guide.md └── util ├── bedrock_utils.py ├── boto3_utils.py ├── date_time_utils.py ├── dynamo_utils.py ├── error_handling.py ├── guard_rails_utils.py ├── open_search_config_utils.py ├── pdf_utils.py └── s3util.py /.codecatalyst/workflows/Build_Deploy_Dev.yaml: -------------------------------------------------------------------------------- 1 | Name: Build_Deploy_Dev 2 | SchemaVersion: "1.0" 3 | 4 | # Only trigger when started manually. 5 | Triggers: 6 | - Type: Manual 7 | 8 | Actions: 9 | Build_a8: 10 | Identifier: aws/build@v1.0.0 11 | 12 | Inputs: 13 | Sources: 14 | - WorkflowSource 15 | Variables: 16 | # Dev environment details 17 | - Name: DEV_ENV 18 | Value: "dampen" 19 | - Name: DEV_URL 20 | Value: "https://radd4urst2.us-east-1.awsapprunner.com" 21 | - Name: DEV_USERNAME 22 | Value: "admin" 23 | - Name: DEV_PASSWORD 24 | Value: "jkhg78BKUYBKGUY7*nhiH" 25 | 26 | Outputs: 27 | AutoDiscoverReports: 28 | Enabled: true 29 | ReportNamePrefix: rpt 30 | 31 | Configuration: 32 | Steps: 33 | - Run: | 34 | echo "Deploying to Dev environment: $DEV_ENV" 35 | 36 | # 1) Log in to Amazon ECR 37 | aws ecr get-login-password --region us-east-1 \ 38 | | docker login --username AWS --password-stdin 677276078734.dkr.ecr.us-east-1.amazonaws.com 39 | 40 | # 2) Build & push Docker images for Dev (paimon) 41 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-app-$DEV_ENV:latest \ 42 | -f app/Dockerfile --push . 43 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-indexing-$DEV_ENV:latest \ 44 | -f indexing/fargate_indexing.Dockerfile --push . 45 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-retriever-$DEV_ENV:latest \ 46 | -f retriever/fargate_retriever.Dockerfile --push . 47 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-evaluation-$DEV_ENV:latest \ 48 | -f evaluation/fargate_evaluation.Dockerfile --push . 49 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-runtime-$DEV_ENV:latest \ 50 | -f opensearch/opensearch.Dockerfile --push . 51 | cd lambda_handlers 52 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-costcompute-$DEV_ENV:latest \ 53 | -f cost_handler/Dockerfile --push . 54 | cd .. 55 | 56 | echo "Docker images for Dev ($DEV_ENV) have been pushed." 57 | 58 | Container: 59 | Registry: CODECATALYST 60 | Image: CodeCatalystLinux_x86_64:2024_03 61 | 62 | Compute: 63 | Type: EC2 64 | 65 | Environment: 66 | Name: Dev 67 | -------------------------------------------------------------------------------- /.codecatalyst/workflows/Build_Deploy_QA.yaml: -------------------------------------------------------------------------------- 1 | Name: Build_Deploy_QA 2 | SchemaVersion: "1.0" 3 | 4 | # Only trigger when started manually. 5 | Triggers: 6 | - Type: Manual 7 | 8 | Actions: 9 | Build_a8: 10 | Identifier: aws/build@v1.0.0 11 | 12 | Inputs: 13 | Sources: 14 | - WorkflowSource 15 | Variables: 16 | # QA environment details 17 | - Name: QA_ENV 18 | Value: "iamdqa" 19 | - Name: QA_URL 20 | Value: "https://tmmqcidccg.us-east-1.awsapprunner.com" 21 | - Name: QA_USERNAME 22 | Value: "admin" 23 | - Name: QA_PASSWORD 24 | Value: "jkhrg&*ObliubUH*(8" 25 | 26 | Outputs: 27 | AutoDiscoverReports: 28 | Enabled: true 29 | ReportNamePrefix: rpt 30 | 31 | Configuration: 32 | Steps: 33 | - Run: | 34 | echo "Deploying to QA environment: $QA_ENV" 35 | 36 | # 1) Log in to Amazon ECR 37 | aws ecr get-login-password --region us-east-1 \ 38 | | docker login --username AWS --password-stdin 677276078734.dkr.ecr.us-east-1.amazonaws.com 39 | 40 | # 2) Build & push Docker images for QA (paimon) 41 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-app-$QA_ENV:latest \ 42 | -f app/Dockerfile --push . 43 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-indexing-$QA_ENV:latest \ 44 | -f indexing/fargate_indexing.Dockerfile --push . 45 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-retriever-$QA_ENV:latest \ 46 | -f retriever/fargate_retriever.Dockerfile --push . 47 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-evaluation-$QA_ENV:latest \ 48 | -f evaluation/fargate_evaluation.Dockerfile --push . 49 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-runtime-$QA_ENV:latest \ 50 | -f opensearch/opensearch.Dockerfile --push . 51 | cd lambda_handlers 52 | docker build -t 677276078734.dkr.ecr.us-east-1.amazonaws.com/flotorch-costcompute-$QA_ENV:latest \ 53 | -f cost_handler/Dockerfile --push . 54 | cd .. 55 | 56 | echo "Docker images for QA ($QA_ENV) have been pushed." 57 | 58 | Container: 59 | Registry: CODECATALYST 60 | Image: CodeCatalystLinux_x86_64:2024_03 61 | 62 | Compute: 63 | Type: EC2 64 | 65 | Environment: 66 | Name: Dev 67 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Report errors or unexpected behavior 3 | title: "[Bug]: " 4 | labels: ["bug"] 5 | assignees: [] 6 | body: 7 | - type: markdown 8 | attributes: 9 | value: | 10 | Thanks for taking the time to fill out this bug report! Please ensure you've checked existing issues first. 11 | 12 | - type: input 13 | id: version 14 | attributes: 15 | label: FloTorch Version 16 | description: What version of our project are you running? 17 | placeholder: e.g., v2.0.1 18 | validations: 19 | required: true 20 | 21 | - type: textarea 22 | id: bug-description 23 | attributes: 24 | label: Bug Description 25 | description: What happened and what did you expect to happen? 26 | placeholder: Describe the bug clearly and concisely 27 | validations: 28 | required: true 29 | 30 | - type: textarea 31 | id: reproduction 32 | attributes: 33 | label: Steps to Reproduce 34 | description: How can we reproduce this issue? 35 | placeholder: | 36 | 1. Go to '...' 37 | 2. Click on '....' 38 | 3. Scroll down to '....' 39 | 4. See error 40 | validations: 41 | required: true 42 | 43 | - type: input 44 | id: browser 45 | attributes: 46 | label: Browser and Version 47 | description: Which browser are you using (if applicable)? 48 | placeholder: e.g., Chrome 120.0.6099.129 49 | 50 | - type: input 51 | id: runtime 52 | attributes: 53 | label: Runtime Version 54 | description: Which runtime version are you using? 55 | placeholder: e.g., Python 3.9.7 56 | 57 | - type: textarea 58 | id: logs 59 | attributes: 60 | label: Error Messages 61 | description: Please copy and paste any relevant error messages or stack traces 62 | render: shell 63 | 64 | - type: textarea 65 | id: additional 66 | attributes: 67 | label: Additional Context 68 | description: Add any other context about the problem here (screenshots, related issues, suggestions for fixing) -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_STORE 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to FloTorch 2 | 3 | Thank you for your interest in contributing to FloTorch. Whether you're fixing bugs, adding features, or improving documentation, your efforts are greatly appreciated. Please follow the guidelines below to ensure a smooth collaboration. 4 | 5 | # Code of Conduct 6 | 7 | By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). 8 | 9 | # Getting Started 10 | 11 | 1. Fork the repository: Create a fork of the repository to your own GitHub account. 12 | 13 | 2. Clone your fork: Clone the forked repository to your local machine: 14 | 15 | ```git clone https://github.com/FissionAI/FloTorch.git``` 16 | 17 | 3. Set up the project: 18 | 19 | - Follow the instructions in the README.md file to set up the development environment. 20 | 21 | - Install any required dependencies. 22 | 23 | 24 | ## Ways to Contribute 25 | 26 | To improve and grow the project, we need your help! Here are some ways to get involved: 27 | 28 | | Activity | Ideas | 29 | | -------- | ----- | 30 | | 👋 Discussions | Start a discussion by asking a question or making a suggestion. | 31 | | 🐛 Open an Issue | Find unhandled exceptions and bugs in the codebase. | 32 | | 📄 Documentation | Write documentation for the project. | 33 | | 🧪 Testing | Write unit tests to increase code coverage. | 34 | | 🧩 Feature Requests | Brainstorm new ideas. | 35 | | 🛠️ Code Contributions | Contribute to the codebase and submit a pull request. | 36 | | 🔢 Code Readability | Find ways to make code more readable and easier to understand. | 37 | | 🤔 Other | Anything else you can think of! | 38 | 39 | These are just a few examples, and we welcome any other ideas you may have! 40 | 41 | ## Submitting Changes 42 | 43 | 1. Fork the repository and clone it locally. 44 | 2. Create a new branch with a descriptive name. Please follow the below naming convention: 45 | - Feature branches : feature/new-feature-name 46 | - Bug fix branches : bugfix/bugfix-issue-123 47 | - Documentation branches: docs/improve-installation-guide 48 | - Refactoring branches: refactor/improve-code-structure 49 | - Test branches: test/add-unit-tests 50 | 3. Make focused changes with clear and concise commit messages. 51 | 4. Open a pull request document the changes you've made and why they're necessary. 52 | 5. Respond to code reviews from maintainers. 53 | 54 | ## Code Quality Expectations 55 | 56 | - Clear, well-documented code 57 | - Follow project style standards 58 | - Rebase onto latest master branch 59 | 60 | ## Attribution 61 | 62 | Contributors to our project will be acknowledged in the project's [CONTRIBUTORS](CONTRIBUTORS.md) file. 63 | 64 | ## License 65 | 66 | By contributing to our project, you agree to license your contributions under the project's open source license. The project's license can be found in the [LICENSE](LICENSE.txt) 67 | 68 | Thank you for your interest in contributing to readme-ai! We appreciate your help and look forward to working with you. 69 | 70 | --- -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | - Adil Raza [@adilraza-fission](https://github.com/adilraza-fission) 2 | - Akhila Sree 3 | - Anusha Pedireddy [LinkedIn](https://www.linkedin.com/in/anusha-peddireddy-b520346a/) 4 | - BalaSriHarsha [LinkedIn](https://www.linkedin.com/in/balasriharsha77777/) 5 | - Girish Kumar [LinkedIn](https://www.linkedin.com/in/girish-kumar-dandamudi/) 6 | - Harish Pillarisetti [LinkedIn](https://www.linkedin.com/in/harish-pillarisetti-245523100/) 7 | - Kiran George [LinkedIn](https://www.linkedin.com/in/kirangeorge96/) 8 | - Krishna Battu [@shivakrishnaah](https://github.com/shivakrishnaah) 9 | - Manisharan Ch [LinkedIn](https://www.linkedin.com/in/manisharan-ch-2335a91a3/) 10 | - Mohan Gopavaram [LinkedIn](https://www.linkedin.com/in/mohangopavaram/) 11 | - Nanda Rajasekahruni [LinkedIn](https://www.linkedin.com/in/nanda-teja-rajasekharuni-b7ab6252/) 12 | - Nikita Kumari 13 | - Prasanna Venkatesh Sridharan [LinkedIn](https://www.linkedin.com/in/prasan80/) 14 | - Prem Kasha [LinkedIn](https://www.linkedin.com/in/premkasha/) 15 | - Raj Ganesh Jayaraman [LinkedIn](https://www.linkedin.com/in/rajganeshj/) 16 | - Ravi Teja Tiruvedula [@Ravifission](https://github.com/Ravifission) 17 | - Samarendra Kandala [LinkedIn](https://www.linkedin.com/in/samarendra-kandala-a14ba490/) 18 | - Shaik Abdul Gafoor [@abdul-fission](https://github.com/abdul-fission) 19 | - Shailaja Kazipeta [LinkedIn](https://www.linkedin.com/in/shailaja-kazipeta-87447015b/) -------------------------------------------------------------------------------- /FloTorch Colour Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/FloTorch Colour Logo.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | flotorch-logo 3 | 4 | Static Badge 5 | 6 | follow on LinkedIn 8 |

9 | 10 | ## What is FloTorch? 11 | 12 | FloTorch is an innovative product poised to transform the field of Generative AI by simplifying and optimizing the decision-making process for leveraging Large Language Models (LLMs) in Retrieval Augmented Generation (RAG) systems. In today’s fast-paced digital landscape, selecting the right LLM setup is critical for achieving efficiency, accuracy, and cost-effectiveness. However, this process often involves extensive trial-and-error, significant resource expenditure, and complex comparisons of performance metrics. Our solution addresses these challenges with a streamlined, user-friendly approach. 13 | 14 | ## Why Choose FloTorch? 15 | - **Well-Architected framework**: Focuses on five pillars of service architecture: Operational Excellence, Security, Reliability, Performance Efficiency, Cost Optimization. 16 | - **Maximizes Efficiency**: Ensures users achieve the best performance from their chosen LLMs in less time as multiple experiments can run parallelly. 17 | - **Eliminates Complexity**: No more manual evaluations or tedious trial-and-error processes. 18 | - **Accelerates Selection**: Streamlines the evaluation and decision-making process. 19 | - **Focus on Innovation**: Allows users to dedicate resources to innovation and deployment rather than experimentation. 20 | 21 | 22 | ## The FloTorch Solution 23 |

24 | flotorch-arch 25 |

26 | 27 | - **Simple & Automatic**: Simple UI, 1,000+ combinations, no human errors, no ‘It Depends’ 28 | - **Saves time**: Reduces experiments from months to hours 29 | - **Encourages Experiments**: Test new LLMs / capabilities in hours with automation 30 | - **Secure**: Your data, your AWS account, your ground truth Q&A 31 | - **Deterministic**: Provides accuracy, performance, costs, and safety 32 | 33 | ## Demo 34 | 35 | [![FloTorch Demo](./cover-image.png?raw=true)](https://fissiontorch-public.s3.us-east-1.amazonaws.com/demo.mp4) 36 | 37 | ## Installation guide 38 | 39 | Please refer to our [Installation guide](install.md) for the installation steps in detail. 40 | 41 | ## Usage guide 42 | 43 | Use our [usage guide](usage_guide.md) for more details on using FloTorch. 44 | Click [here](faq.md) for frequently asked questions. 45 | 46 | ## Contributing 47 | 48 | For those who'd like to contribute code, see our [Contribution Guide](CONTRIBUTING.md). 49 | 50 | ## Blogs 51 | 52 | - [Benchmarking Amazon Nova and GPT-4o models with FloTorch](https://aws.amazon.com/blogs/machine-learning/benchmarking-amazon-nova-and-gpt-4o-models-with-flotorch/) 53 | - [Speed up RAG Experiments on AWS SageMaker with DeepSeek-R1 & FloTorch](https://www.flotorch.ai/blog/speed-up-rag-experiments-on-aws-sagemaker-with-deepseek-r1-flotorch) 54 | 55 | -------------------------------------------------------------------------------- /app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim AS base 2 | WORKDIR /app 3 | RUN apt-get update 4 | COPY app/requirements.txt app/requirements.txt 5 | RUN pip install --no-cache-dir -r app/requirements.txt 6 | COPY app/ app/ 7 | COPY core/ core/ 8 | COPY baseclasses/ baseclasses/ 9 | COPY config/ config/ 10 | COPY util/ util/ 11 | COPY constants/ constants/ 12 | ENV PYTHONPATH=/app 13 | ENV NGINX_AUTH_PASSWORD=Flotorch@123 14 | 15 | FROM node:22 AS uibuilder 16 | RUN npm install -g corepack@latest 17 | RUN corepack enable pnpm 18 | WORKDIR /app/ 19 | COPY ui/ /app/ui/ 20 | WORKDIR /app/ui 21 | RUN pnpm install && pnpm run generate 22 | 23 | FROM base AS release 24 | WORKDIR /app 25 | 26 | # Install required packages 27 | RUN apt-get update && apt-get install -y \ 28 | nginx \ 29 | apache2-utils \ 30 | supervisor \ 31 | && rm -rf /var/lib/apt/lists/* 32 | 33 | # Copy UI files and configure nginx 34 | COPY --from=uibuilder /app/ui/dist /usr/share/nginx/html 35 | RUN rm -rf /etc/nginx/conf.d/default.conf 36 | RUN rm -rf /etc/nginx/sites-enabled/default 37 | COPY app/nginx /etc/nginx 38 | 39 | # Create directory for supervisor configs 40 | RUN mkdir -p /etc/supervisor/conf.d 41 | 42 | # Create entrypoint script 43 | COPY < DynamoDBOperations: 44 | return db.execution_db 45 | 46 | def get_experiment_db() -> DynamoDBOperations: 47 | return db.experiment_db 48 | 49 | def get_question_metrics_db() -> DynamoDBOperations: 50 | return db.question_metrics_db 51 | 52 | def get_execution_model_invocations_db() -> DynamoDBOperations: 53 | return db.execution_model_invocations_db 54 | 55 | def get_step_function_orchestrator() -> StepFunctionOrchestrator: 56 | return db.step_function_orchestrator 57 | -------------------------------------------------------------------------------- /app/dependencies/s3.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | 3 | from botocore.client import Config 4 | 5 | from config.config import get_config 6 | 7 | 8 | config = get_config() 9 | 10 | # Create global S3 client 11 | S3_BUCKET = config.s3_bucket 12 | s3 = boto3.client('s3', config=Config(signature_version='s3v4')) 13 | 14 | def get_s3_client() -> boto3.client: 15 | return s3 16 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastapi.middleware.cors import CORSMiddleware 3 | from .seed_data import seed_models 4 | 5 | from app.dependencies.database import db 6 | from app.routes import execution, experiment, health, uploads, bedrock_config, config, expert_eval 7 | from app.dependencies.database import ( 8 | get_execution_model_invocations_db 9 | ) 10 | 11 | def create_app() -> FastAPI: 12 | 13 | app = FastAPI(title="FloTorch Experiment API") 14 | 15 | # Initialize databases at startup 16 | @app.on_event("startup") 17 | async def startup_event(): 18 | db.initialize() 19 | seed_models(get_execution_model_invocations_db()) 20 | 21 | app.add_middleware( 22 | CORSMiddleware, 23 | allow_origins=["*"], 24 | allow_credentials=True, 25 | allow_methods=["*"], 26 | allow_headers=["*"], 27 | ) 28 | 29 | # Register routers 30 | app.include_router(uploads.router) 31 | app.include_router(execution.router) 32 | app.include_router(experiment.router) 33 | app.include_router(health.router) 34 | app.include_router(bedrock_config.router) 35 | app.include_router(config.router) 36 | app.include_router(expert_eval.router) 37 | 38 | return app 39 | 40 | 41 | app = create_app() 42 | -------------------------------------------------------------------------------- /app/marketplace_app.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM 709825985650.dkr.ecr.us-east-1.amazonaws.com/fission-labs/flotorch-app:1.0.0 2 | 3 | WORKDIR /app 4 | COPY .env app/.env 5 | ENV PYTHONPATH=/app 6 | 7 | # Add build argument for the password 8 | ARG BASIC_AUTH_PASSWORD 9 | RUN test -n "$BASIC_AUTH_PASSWORD" || (echo "BASIC_AUTH_PASSWORD build argument is required" && false) 10 | 11 | # Create htpasswd file with provided password 12 | RUN htpasswd -cb /etc/nginx/.htpasswd admin "${BASIC_AUTH_PASSWORD}" 13 | 14 | CMD ["sh", "-c", "nginx -g 'daemon off;' & uvicorn app.main:app --host 0.0.0.0 --port 8000"] -------------------------------------------------------------------------------- /app/models.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | from typing import Dict, List, Optional 3 | 4 | class ExperimentRequest(BaseModel): 5 | config: Dict 6 | gt_data: str 7 | kb_data: str 8 | region: str 9 | name: str 10 | 11 | class ExperimentResponse(BaseModel): 12 | status: str 13 | experiment_ids: List[str] 14 | 15 | class ExecutionResponse(BaseModel): 16 | status: str 17 | execution_id: str -------------------------------------------------------------------------------- /app/nginx/auth.conf: -------------------------------------------------------------------------------- 1 | auth_basic "Restricted Access"; 2 | auth_basic_user_file /etc/nginx/.htpasswd; 3 | -------------------------------------------------------------------------------- /app/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | # Generated by nginxconfig.io 2 | # See nginxconfig.txt for the configuration share link 3 | pid /var/run/nginx.pid; 4 | worker_processes auto; 5 | worker_rlimit_nofile 65535; 6 | 7 | # Load modules 8 | include /etc/nginx/modules-enabled/*.conf; 9 | 10 | events { 11 | multi_accept on; 12 | worker_connections 65535; 13 | } 14 | 15 | http { 16 | charset utf-8; 17 | sendfile on; 18 | tcp_nopush on; 19 | tcp_nodelay on; 20 | server_tokens off; 21 | log_not_found off; 22 | types_hash_max_size 2048; 23 | types_hash_bucket_size 64; 24 | client_max_body_size 16M; 25 | client_body_buffer_size 4M; 26 | keepalive_timeout 300s; 27 | keepalive_requests 100; 28 | reset_timedout_connection on; 29 | # MIME 30 | include mime.types; 31 | default_type application/octet-stream; 32 | 33 | # Logging 34 | access_log off; 35 | error_log /dev/null; 36 | 37 | # Connection header for WebSocket reverse proxy 38 | map $http_upgrade $connection_upgrade { 39 | default upgrade; 40 | "" close; 41 | } 42 | 43 | map $remote_addr $proxy_forwarded_elem { 44 | 45 | # IPv4 addresses can be sent as-is 46 | ~^[0-9.]+$ "for=$remote_addr"; 47 | 48 | # IPv6 addresses need to be bracketed and quoted 49 | ~^[0-9A-Fa-f:.]+$ "for=\"[$remote_addr]\""; 50 | 51 | # Unix domain socket names cannot be represented in RFC 7239 syntax 52 | default "for=unknown"; 53 | } 54 | 55 | map $http_forwarded $proxy_add_forwarded { 56 | 57 | # If the incoming Forwarded header is syntactically valid, append to it 58 | "~^(,[ \\t]*)*([!#$%&'*+.^_`|~0-9A-Za-z-]+=([!#$%&'*+.^_`|~0-9A-Za-z-]+|\"([\\t \\x21\\x23-\\x5B\\x5D-\\x7E\\x80-\\xFF]|\\\\[\\t \\x21-\\x7E\\x80-\\xFF])*\"))?(;([!#$%&'*+.^_`|~0-9A-Za-z-]+=([!#$%&'*+.^_`|~0-9A-Za-z-]+|\"([\\t \\x21\\x23-\\x5B\\x5D-\\x7E\\x80-\\xFF]|\\\\[\\t \\x21-\\x7E\\x80-\\xFF])*\"))?)*([ \\t]*,([ \\t]*([!#$%&'*+.^_`|~0-9A-Za-z-]+=([!#$%&'*+.^_`|~0-9A-Za-z-]+|\"([\\t \\x21\\x23-\\x5B\\x5D-\\x7E\\x80-\\xFF]|\\\\[\\t \\x21-\\x7E\\x80-\\xFF])*\"))?(;([!#$%&'*+.^_`|~0-9A-Za-z-]+=([!#$%&'*+.^_`|~0-9A-Za-z-]+|\"([\\t \\x21\\x23-\\x5B\\x5D-\\x7E\\x80-\\xFF]|\\\\[\\t \\x21-\\x7E\\x80-\\xFF])*\"))?)*)?)*$" "$http_forwarded, $proxy_forwarded_elem"; 59 | 60 | # Otherwise, replace it 61 | default "$proxy_forwarded_elem"; 62 | } 63 | 64 | # Load configs 65 | include /etc/nginx/conf.d/*.conf; 66 | include /etc/nginx/sites-enabled/*; 67 | } 68 | -------------------------------------------------------------------------------- /app/nginx/nginxconfig.io/general.conf: -------------------------------------------------------------------------------- 1 | # favicon.ico 2 | location = /favicon.ico { 3 | log_not_found off; 4 | } 5 | 6 | # robots.txt 7 | location = /robots.txt { 8 | log_not_found off; 9 | } 10 | 11 | # assets, media 12 | location ~* \.(?:css(\.map)?|js(\.map)?|jpe?g|png|gif|ico|cur|heic|webp|tiff?|mp3|m4a|aac|ogg|midi?|wav|mp4|mov|webm|mpe?g|avi|ogv|flv|wmv)$ { 13 | expires 7d; 14 | } 15 | 16 | # svg, fonts 17 | location ~* \.(?:svgz?|ttf|ttc|otf|eot|woff2?)$ { 18 | add_header Access-Control-Allow-Origin "*"; 19 | expires 7d; 20 | } 21 | 22 | # gzip 23 | gzip on; 24 | gzip_vary on; 25 | gzip_proxied any; 26 | gzip_comp_level 6; 27 | gzip_types text/plain text/css text/xml application/json application/javascript application/rss+xml application/atom+xml image/svg+xml; -------------------------------------------------------------------------------- /app/nginx/nginxconfig.io/proxy.conf: -------------------------------------------------------------------------------- 1 | proxy_http_version 1.1; 2 | proxy_cache_bypass $http_upgrade; 3 | 4 | # Proxy SSL 5 | proxy_ssl_server_name on; 6 | 7 | # Proxy headers 8 | proxy_set_header Upgrade $http_upgrade; 9 | proxy_set_header Connection $connection_upgrade; 10 | proxy_set_header X-Real-IP $remote_addr; 11 | proxy_set_header Forwarded $proxy_add_forwarded; 12 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 13 | proxy_set_header X-Forwarded-Proto $scheme; 14 | proxy_set_header X-Forwarded-Host $host; 15 | proxy_set_header X-Forwarded-Port $server_port; 16 | 17 | # Proxy timeouts 18 | proxy_connect_timeout 300s; 19 | proxy_send_timeout 300s; 20 | proxy_read_timeout 300s; 21 | 22 | # Keep-alive timeouts 23 | keepalive_timeout 310s; 24 | keepalive_requests 100; 25 | proxy_socket_keepalive on; 26 | 27 | # Remove Below 28 | # proxy_buffer_size 16k; 29 | # proxy_buffers 8 16k; 30 | # proxy_busy_buffers_size 16k; 31 | -------------------------------------------------------------------------------- /app/nginx/nginxconfig.io/security.conf: -------------------------------------------------------------------------------- 1 | # security headers 2 | add_header X-XSS-Protection "1; mode=block" always; 3 | add_header X-Content-Type-Options "nosniff" always; 4 | add_header Referrer-Policy "no-referrer-when-downgrade" always; 5 | #add_header Content-Security-Policy "default-src 'self' http: https: ws: wss: data: blob: 'unsafe-inline'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; worker-src blob:; frame-ancestors 'self';" always; 6 | add_header Permissions-Policy "interest-cohort=()" always; 7 | 8 | # . files 9 | location ~ /\.(?!well-known) { 10 | deny all; 11 | } 12 | -------------------------------------------------------------------------------- /app/nginx/sites-enabled/app.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name _; 4 | root /usr/share/nginx/html; 5 | 6 | # security 7 | include nginxconfig.io/security.conf; 8 | 9 | # logging 10 | access_log /var/log/nginx/access.log combined buffer=512k flush=1m; 11 | error_log /var/log/nginx/error.log warn; 12 | 13 | location /health { 14 | return 200; 15 | } 16 | 17 | location /api/health { 18 | return 200; 19 | } 20 | 21 | # index.html fallback 22 | location / { 23 | include auth.conf; 24 | try_files $uri $uri/ /index.html; 25 | } 26 | 27 | # reverse proxy 28 | location /api/ { 29 | include auth.conf; 30 | rewrite ^/api/(.*) /$1 break; 31 | proxy_pass http://localhost:8000; 32 | include nginxconfig.io/proxy.conf; 33 | if ($request_method = 'OPTIONS' ) { 34 | add_header 'Access-Control-Allow-Origin' $http_origin; 35 | add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS'; 36 | # 37 | # Custom headers and headers various browsers *should* be OK with but aren't 38 | # 39 | add_header 'Access-Control-Allow-Headers' 40 | 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization'; 41 | # 42 | # Tell client that this pre-flight info is valid for 20 days 43 | # 44 | add_header 'Access-Control-Max-Age' 1728000; 45 | add_header 'Content-Type' 'text/plain; charset=utf-8'; 46 | add_header 'Access-Control-Allow-Credentials' 'true' always; 47 | add_header 'Content-Length' 0; 48 | return 204; 49 | } 50 | if ($request_method = 'POST' ) { 51 | add_header 'Access-Control-Allow-Origin' $http_origin always; 52 | add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS' always; 53 | add_header 'Access-Control-Allow-Credentials' 'true' always; 54 | add_header 'Access-Control-Allow-Headers' 55 | 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' 56 | always; 57 | add_header 'Access-Control-Expose-Headers' 58 | 'Content-Length,Content-Range' 59 | always; 60 | } 61 | if ($request_method = 'GET' ) { 62 | add_header 'Access-Control-Allow-Origin' $http_origin always; 63 | add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS' always; 64 | add_header 'Access-Control-Allow-Credentials' 'true' always; 65 | add_header 'Access-Control-Allow-Headers' 66 | 'DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization' 67 | always; 68 | add_header 'Access-Control-Expose-Headers' 69 | 'Content-Length,Content-Range' 70 | always; 71 | } 72 | } 73 | 74 | # additional config 75 | include nginxconfig.io/general.conf; 76 | } 77 | -------------------------------------------------------------------------------- /app/orchestrator.py: -------------------------------------------------------------------------------- 1 | from core.dynamodb import DynamoDBOperations 2 | import json 3 | import boto3 4 | from decimal import Decimal 5 | from fastapi import HTTPException 6 | import os 7 | from config.config import get_config 8 | import logging 9 | from typing import Dict, Any 10 | 11 | # Configure logging 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | class StepFunctionOrchestrator: 16 | """ 17 | Handles Step Function orchestration for experiment execution. 18 | """ 19 | def __init__(self): 20 | self.config = get_config() 21 | self.step_function_client = self._initialize_step_function_client() 22 | 23 | def _initialize_step_function_client(self) -> boto3.client: 24 | """ 25 | Initialize the AWS Step Function client. 26 | 27 | Returns: 28 | boto3.client: Configured Step Function client 29 | """ 30 | try: 31 | return boto3.client("stepfunctions", region_name=self.config.aws_region) 32 | except Exception as e: 33 | logger.error(f"Failed to initialize Step Function client: {e}") 34 | raise HTTPException( 35 | status_code=500, 36 | detail="Failed to initialize AWS Step Function client" 37 | ) 38 | 39 | def _prepare_execution_payload(self, execution_id: str) -> str: 40 | """ 41 | Prepare the payload for Step Function execution. 42 | 43 | Args: 44 | execution_id (str): The execution ID 45 | 46 | Returns: 47 | str: JSON string payload 48 | """ 49 | try: 50 | payload = {"execution_id": execution_id} 51 | return json.dumps(payload) 52 | except Exception as e: 53 | logger.error(f"Failed to prepare execution payload: {e}") 54 | raise HTTPException( 55 | status_code=500, 56 | detail="Failed to prepare execution payload" 57 | ) 58 | 59 | def run_experiment_orchestration(self, execution_id: str) -> Dict[str, Any]: 60 | """ 61 | Trigger the invocation of step function for execution. 62 | 63 | Args: 64 | execution_id (str): The execution ID. 65 | 66 | Returns: 67 | Dict[str, Any]: Response from Step Function execution 68 | 69 | Raises: 70 | HTTPException: If orchestration fails 71 | """ 72 | try: 73 | # Prepare the payload 74 | payload = self._prepare_execution_payload(execution_id) 75 | 76 | # Start the Step Function execution 77 | response = self.step_function_client.start_execution( 78 | stateMachineArn=self.config.step_function_arn, 79 | input=payload 80 | ) 81 | 82 | logger.info(f"Started Step Function with Execution ARN: {response['executionArn']}") 83 | return response 84 | 85 | except Exception as e: 86 | error_message = f"Failed to execute orchestration: {str(e)}" 87 | logger.error(error_message, exc_info=True) 88 | raise HTTPException(status_code=500, detail=error_message) 89 | 90 | # Create a singleton instance 91 | orchestrator = StepFunctionOrchestrator() 92 | 93 | def run_experiment_orchestration(execution_id: str) -> Dict[str, Any]: 94 | """ 95 | Wrapper function for backward compatibility. 96 | 97 | Args: 98 | execution_id (str): The execution ID. 99 | 100 | Returns: 101 | Dict[str, Any]: Response from Step Function execution 102 | """ 103 | return orchestrator.run_experiment_orchestration(execution_id) 104 | -------------------------------------------------------------------------------- /app/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.36.2 2 | botocore==1.36.2 3 | fastapi[standard]==0.115.5 4 | langchain_community==0.3.7 5 | opensearch_py==2.8.0 6 | pydantic==2.9.2 7 | python-dotenv==1.0.1 8 | click==8.1.7 9 | uvicorn==0.32.0 10 | pandas 11 | PyPDF2==3.0.1 12 | pymupdf 13 | FloTorch-core 14 | -------------------------------------------------------------------------------- /app/routes/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/app/routes/__init__.py -------------------------------------------------------------------------------- /app/routes/bedrock_config.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, HTTPException, Query 2 | from util.guard_rails_utils import GuardRailsUtils 3 | from util.bedrock_utils import KnowledgeBaseUtils 4 | from constants import StatusCodes 5 | from typing import Optional 6 | 7 | router = APIRouter() 8 | 9 | @router.get("/bedrock/guardrails", tags=["bedrock"]) 10 | async def health_check(region: Optional[str] = Query('us-east-1', description="AWS region to list guardrails from")): 11 | "Endpoint to list Bedrock guardrails." 12 | 13 | try: 14 | response = GuardRailsUtils.get_bedrock_guardrails(region) 15 | return response 16 | except Exception as e: 17 | raise HTTPException(status_code=StatusCodes.INTERNAL_SERVER_ERROR, detail=str(e)) 18 | 19 | 20 | @router.get("/bedrock/knowledge_bases", tags=["bedrock"]) 21 | async def get_knowledge_bases(region: Optional[str] = Query('us-east-1', description="AWS region to list knowledge bases from")): 22 | 23 | try: 24 | valid_kbs = KnowledgeBaseUtils(region).list_knowledge_bases() 25 | return valid_kbs 26 | except Exception as e: 27 | raise HTTPException(status_code=StatusCodes.INTERNAL_SERVER_ERROR, detail=str(e)) -------------------------------------------------------------------------------- /app/routes/config.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | from util.open_search_config_utils import OpenSearchUtils 3 | 4 | router = APIRouter() 5 | 6 | @router.get("/config", tags=["config"]) 7 | async def config(): 8 | return { 9 | "opensearch": OpenSearchUtils.opensearch_config() 10 | } 11 | -------------------------------------------------------------------------------- /app/routes/health.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | router = APIRouter() 4 | 5 | @router.get("/health", tags=["health"]) 6 | async def health_check(): 7 | return {"status": "healthy"} 8 | -------------------------------------------------------------------------------- /app/routes/uploads.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import List 3 | from pydantic import BaseModel 4 | import logging 5 | 6 | from http.client import HTTPException 7 | 8 | from fastapi import APIRouter, Depends 9 | 10 | from app.dependencies.s3 import get_s3_client, S3_BUCKET 11 | 12 | logger = logging.getLogger(__name__) 13 | router = APIRouter() 14 | 15 | class PresignedurlRequestKB(BaseModel): 16 | unique_id: str 17 | files: List[str] 18 | 19 | class PresignedurlRequestGT(BaseModel): 20 | unique_id: str 21 | 22 | 23 | @router.post("/presignedurl", tags=["uploads"]) 24 | async def get_presigned_url( 25 | request: PresignedurlRequestGT, 26 | s3=Depends(get_s3_client) 27 | ): 28 | unique_id = request.unique_id 29 | ground_truth_data_key = f"{unique_id}/gt_data/gt.json" 30 | 31 | gt_data_path = f"s3://{S3_BUCKET}/{ground_truth_data_key}" 32 | 33 | try: 34 | 35 | # Generate presigned URL for ground truth data upload 36 | gt_data_url = s3.generate_presigned_url( 37 | ClientMethod="put_object", 38 | Params={"Bucket": S3_BUCKET, "Key": ground_truth_data_key}, 39 | ExpiresIn=7200, 40 | ) 41 | logger.info(f"Generated presigned URL for ground truth data: {ground_truth_data_key}") 42 | 43 | return { 44 | "gt_data": { 45 | "path": gt_data_path, 46 | "presignedurl": gt_data_url 47 | }, 48 | "uuid": unique_id, 49 | } 50 | except Exception as e: 51 | logger.error(f"Failed to generate presigned URL for ground truth data: {str(e)}") 52 | raise HTTPException(status_code=500, detail=f"Error generating pre-signed URL: {str(e)}") 53 | 54 | 55 | @router.post('/presigned_url_kb', tags = ['uploads']) 56 | async def get_presigned_url_kb( 57 | request: PresignedurlRequestKB, 58 | s3 = Depends(get_s3_client) 59 | ): 60 | 61 | try: 62 | unique_id = request.unique_id 63 | files = request.files 64 | 65 | prefix = f"{unique_id}/kb_data" 66 | 67 | # Check and clean existing files 68 | # TODO - optimize it by uploading only new files 69 | response = s3.list_objects_v2(Bucket=S3_BUCKET, Prefix=prefix) 70 | if 'Contents' in response: 71 | objects_to_delete = [{'Key': obj['Key']} for obj in response['Contents']] 72 | s3.delete_objects(Bucket=S3_BUCKET, Delete={'Objects': objects_to_delete}) 73 | logger.info(f"Cleaned up existing files for KB upload: {prefix}") 74 | 75 | result = [] 76 | 77 | # Generate presigned URLs for new files 78 | for file_name in files: 79 | file_key = f"{prefix}/{file_name}" 80 | file_path=f"s3://{S3_BUCKET}/{file_key}" 81 | 82 | data_url = s3.generate_presigned_url( 83 | ClientMethod="put_object", 84 | Params={"Bucket": S3_BUCKET, "Key": file_key}, 85 | ExpiresIn=7200,) 86 | 87 | result.append({ 88 | "path": file_path, 89 | "presignedurl": data_url 90 | }) 91 | 92 | logger.info(f"Generated {len(files)} presigned URLs for KB upload: {prefix}") 93 | return {'uuid': unique_id, "files": result} 94 | 95 | except Exception as e: 96 | logger.error(f"Failed to generate presigned URLs for KB upload: {str(e)}") 97 | raise HTTPException(status_code=500, detail=f"Error generating pre-signed URL: {str(e)}") -------------------------------------------------------------------------------- /app/seed_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from fastapi import Depends 3 | 4 | from app.dependencies.database import ( 5 | get_execution_model_invocations_db 6 | ) 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | MODELS = { 11 | "bedrock_us.amazon.nova-lite-v1:0": 35, 12 | "bedrock_us.amazon.nova-micro-v1:0": 35, 13 | "bedrock_us.amazon.nova-pro-v1:0": 12, 14 | "bedrock_amazon.titan-text-lite-v1": 14, 15 | "bedrock_amazon.titan-text-express-v1": 14, 16 | "bedrock_us.anthropic.claude-3-5-sonnet-20241022-v2:0": 5, 17 | "bedrock_anthropic.claude-3-5-sonnet-20240620-v1:0": 5, 18 | "bedrock_us.anthropic.claude-3-7-sonnet-20250219-v1:0": 5, 19 | "bedrock_us.anthropic.claude-3-5-haiku-20241022-v1:0": 5, 20 | "bedrock_cohere.command-r-plus-v1:0": 25, 21 | "bedrock_cohere.command-r-v1:0": 14, 22 | "bedrock_us.meta.llama3-2-1b-instruct-v1:0": 14, 23 | "bedrock_us.meta.llama3-2-3b-instruct-v1:0": 14, 24 | "bedrock_us.meta.llama3-2-11b-instruct-v1:0": 14, 25 | "bedrock_us.meta.llama3-2-90b-instruct-v1:0": 25, 26 | "bedrock_mistral.mistral-7b-instruct-v0:2": 25, 27 | "bedrock_mistral.mistral-large-2402-v1:0": 25, 28 | "bedrock_amazon.titan-embed-text-v1": 30, 29 | "bedrock_amazon.titan-embed-text-v2:0": 30, 30 | "bedrock_amazon.titan-embed-image-v1": 30, 31 | "bedrock_cohere.embed-english-v3": 30, 32 | "bedrock_cohere.embed-multilingual-v3": 30, 33 | "sagemaker_Qwen/Qwen2.5-32B-Instruct": 50, 34 | "sagemaker_Qwen/Qwen2.5-14B-Instruct": 50, 35 | "sagemaker_meta-Llama/Llama-3.1-8B": 50, 36 | "sagemaker_meta-Llama/Llama-3.1-70B-Instruct": 50, 37 | "sagemaker_BAAI/bge-large-en-v1.5": 50, 38 | "bedrock_mistral.mixtral-8x7b-instruct-v0:1": 25, 39 | "sagemaker_huggingface-sentencesimilarity-bge-large-en-v1-5": 4, 40 | "sagemaker_huggingface-sentencesimilarity-bge-m3": 4, 41 | "sagemaker_huggingface-textembedding-gte-qwen2-7b-instruct": 2, 42 | "sagemaker_meta-textgeneration-llama-3-1-8b-instruct": 2, 43 | "sagemaker_huggingface-llm-falcon-7b-instruct-bf16": 2, 44 | "sagemaker_meta-textgeneration-llama-3-3-70b-instruct": 4, 45 | "sagemaker_meta-vlm-llama-4-scout-17b-16e-instruct": 2, 46 | "sagemaker_deepseek-ai/DeepSeek-R1-Distill-Llama-8B": 2, 47 | "sagemaker_deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": 4, 48 | "sagemaker_deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": 2, 49 | "sagemaker_deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": 2 50 | } 51 | 52 | def seed_models(execution_model_invocations_db) -> int: 53 | """ 54 | Seeds the models data into DynamoDB. 55 | 56 | Args: 57 | execution_model_invocations_db: DynamoDBOperations instance for the models table 58 | 59 | Returns: 60 | int: Number of models seeded 61 | 62 | Raises: 63 | Exception: If there's an error during seeding 64 | """ 65 | try: 66 | if not execution_model_invocations_db: 67 | raise ValueError("Database connection is required") 68 | 69 | seeded_count = 0 70 | for model_id, model_limit in MODELS.items(): 71 | if not model_limit or model_limit <= 0: 72 | logger.error(f"Model limit {model_limit} is invalid for {model_id}") 73 | model_limit = 5 74 | execution_model_invocations_db.put_item({ 75 | "execution_model_id": model_id, 76 | "invocations": 0, 77 | "limit": model_limit 78 | }) 79 | seeded_count += 1 80 | 81 | logger.info(f"Successfully seeded {seeded_count} models") 82 | return seeded_count 83 | 84 | except Exception as e: 85 | logger.error(f"Error seeding models: {str(e)}") 86 | raise -------------------------------------------------------------------------------- /cfn/opensearch-template.yaml: -------------------------------------------------------------------------------- 1 | AWSTemplateFormatVersion: '2010-09-09' 2 | Description: 'CloudFormation template for OpenSearch Domain' 3 | 4 | Parameters: 5 | ProjectName: 6 | Type: String 7 | Description: Name of the project 8 | ClientName: 9 | Type: String 10 | Description: Client name 11 | CreatedBy: 12 | Type: String 13 | Description: Created By 14 | TableSuffix: 15 | Type: String 16 | Description: Suffix to append to resource names 17 | VpcId: 18 | Type: AWS::EC2::VPC::Id 19 | Description: VPC ID where OpenSearch will be deployed 20 | PrivateSubnetId: 21 | Type: AWS::EC2::Subnet::Id 22 | Description: Private subnet ID for OpenSearch 23 | VpcCidr: 24 | Type: String 25 | Description: CIDR block of the VPC 26 | OpenSearchAdminUser: 27 | Type: String 28 | Description: OpenSearch admin username 29 | OpenSearchAdminPassword: 30 | Type: String 31 | Description: OpenSearch admin password 32 | NoEcho: true 33 | 34 | Resources: 35 | OpenSearchSecurityGroup: 36 | Type: AWS::EC2::SecurityGroup 37 | DeletionPolicy: Delete 38 | UpdateReplacePolicy: Delete 39 | Properties: 40 | GroupDescription: Security group for OpenSearch domain 41 | GroupName: !Sub ${ClientName}-opensearch-sg 42 | VpcId: !Ref VpcId 43 | SecurityGroupIngress: 44 | - IpProtocol: tcp 45 | FromPort: 443 46 | ToPort: 443 47 | CidrIp: !Ref VpcCidr 48 | Description: Allow HTTPS access from VPC 49 | Tags: 50 | - Key: Name 51 | Value: !Sub ${ClientName}-opensearch-sg 52 | - Key: ClientName 53 | Value: !Ref ClientName 54 | - Key: CreatedBy 55 | Value: !Ref CreatedBy 56 | - Key: ProjectName 57 | Value: !Ref ProjectName 58 | 59 | OpenSearchDomain: 60 | Type: AWS::OpenSearchService::Domain 61 | DeletionPolicy: Delete 62 | UpdateReplacePolicy: Delete 63 | Properties: 64 | DomainName: !Sub flotorch-${TableSuffix} 65 | EngineVersion: OpenSearch_2.17 66 | ClusterConfig: 67 | InstanceType: r7g.2xlarge.search 68 | InstanceCount: 3 69 | DedicatedMasterEnabled: false 70 | ZoneAwarenessEnabled: false 71 | EBSOptions: 72 | EBSEnabled: true 73 | VolumeType: gp3 74 | VolumeSize: 500 75 | Iops: 16000 76 | Throughput: 1000 77 | VPCOptions: 78 | SubnetIds: 79 | - !Ref PrivateSubnetId 80 | SecurityGroupIds: 81 | - !Ref OpenSearchSecurityGroup 82 | EncryptionAtRestOptions: 83 | Enabled: true 84 | NodeToNodeEncryptionOptions: 85 | Enabled: true 86 | DomainEndpointOptions: 87 | EnforceHTTPS: true 88 | AdvancedSecurityOptions: 89 | Enabled: true 90 | InternalUserDatabaseEnabled: true 91 | MasterUserOptions: 92 | MasterUserName: !Ref OpenSearchAdminUser 93 | MasterUserPassword: !Ref OpenSearchAdminPassword 94 | AccessPolicies: 95 | Version: '2012-10-17' 96 | Statement: 97 | - Effect: Allow 98 | Principal: 99 | AWS: '*' 100 | Action: 'es:*' 101 | Resource: !Sub 'arn:aws:es:${AWS::Region}:${AWS::AccountId}:domain/flotorch-${TableSuffix}/*' 102 | Tags: 103 | - Key: Name 104 | Value: !Sub ${ClientName}-opensearch 105 | - Key: ClientName 106 | Value: !Ref ClientName 107 | - Key: CreatedBy 108 | Value: !Ref CreatedBy 109 | - Key: ProjectName 110 | Value: !Ref ProjectName 111 | 112 | Outputs: 113 | OpenSearchEndpoint: 114 | Description: OpenSearch domain endpoint 115 | Value: !GetAtt OpenSearchDomain.DomainEndpoint 116 | OpenSearchDomainArn: 117 | Description: OpenSearch domain ARN 118 | Value: !GetAtt OpenSearchDomain.DomainArn 119 | OpenSearchAdminUser: 120 | Description: OpenSearch admin username 121 | Value: !Ref OpenSearchAdminUser 122 | OpenSearchAdminPassword: 123 | Description: OpenSearch admin password 124 | Value: !Ref OpenSearchAdminPassword 125 | DashboardsUrl: 126 | Description: OpenSearch Dashboards URL 127 | Value: !Sub https://${OpenSearchDomain.DomainEndpoint}/_dashboards/ 128 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import Config 2 | from .experimental_config import ExperimentalConfig -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from typing import Optional 4 | from dataclasses import dataclass 5 | 6 | @dataclass 7 | class Config: 8 | """Configuration class for AWS and OpenSearch settings.""" 9 | profile_name: Optional[str] 10 | aws_region: str 11 | bedrock_endpoint_url: Optional[str] 12 | opensearch_host: str 13 | vector_field: str 14 | vector_index_name: str 15 | opensearch_serverless: bool 16 | execution_table: str 17 | experiment_table: str 18 | experiment_question_metrics_table: str 19 | experiment_question_metrics_experimentid_index: str 20 | execution_model_invocations_table: str 21 | opensearch_username: Optional[str] 22 | opensearch_password: Optional[str] 23 | step_function_arn : str 24 | inference_system_prompt : str 25 | s3_bucket : str 26 | bedrock_role_arn : str 27 | sagemaker_role_arn: str 28 | bedrock_limit_csv_path: str 29 | 30 | @staticmethod 31 | def load_config() -> 'Config': 32 | """ 33 | Load configuration from environment variables. 34 | 35 | Returns: 36 | Config: Configuration object with loaded values 37 | """ 38 | load_dotenv() 39 | 40 | return Config( 41 | profile_name=os.getenv('profile_name'), 42 | aws_region=os.getenv('aws_region', 'us-east-1'), 43 | bedrock_endpoint_url=os.getenv('bedrock_endpoint_url'), 44 | opensearch_host=os.getenv('opensearch_host', ''), 45 | vector_field=os.getenv('vector_field_name', 'vectors'), 46 | vector_index_name=os.getenv('vector_index_name', ''), 47 | opensearch_serverless=os.getenv('opensearch_serverless', 'false').lower() == 'true', 48 | execution_table=os.getenv('execution_table', ''), 49 | experiment_table=os.getenv('experiment_table', ''), 50 | experiment_question_metrics_table=os.getenv('experiment_question_metrics_table', ''), 51 | experiment_question_metrics_experimentid_index = os.getenv('experiment_question_metrics_experimentid_index', ''), 52 | execution_model_invocations_table=os.getenv('execution_model_invocations_table', ''), 53 | opensearch_password=os.getenv('opensearch_password', ''), 54 | opensearch_username=os.getenv('opensearch_username', ''), 55 | step_function_arn=os.getenv('step_function_arn', ''), 56 | inference_system_prompt=os.getenv('inference_system_prompt', ''), 57 | s3_bucket=os.getenv('s3_bucket', ''), 58 | bedrock_role_arn=os.getenv('bedrock_role_arn', ''), 59 | sagemaker_role_arn=os.getenv('sagemaker_role_arn', ''), 60 | bedrock_limit_csv_path=os.getenv('bedrock_limit_csv', '') 61 | ) 62 | 63 | 64 | def get_config() -> Config: 65 | """ 66 | Get validated configuration. 67 | 68 | Returns: 69 | Config: Validated configuration object 70 | 71 | Raises: 72 | ValueError: If configuration validation fails 73 | """ 74 | config = Config.load_config() 75 | return config 76 | -------------------------------------------------------------------------------- /config/experimental_config.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional, List, Union 2 | from pydantic import BaseModel, Field 3 | import json 4 | import re 5 | 6 | class ExperimentalConfig(BaseModel): 7 | 8 | execution_id: str 9 | experiment_id: str 10 | aws_region: str = Field(alias="aws_region") 11 | kb_data: str = Field(alias="kb_data") 12 | gt_data: str = Field(alias="gt_data") 13 | chunking_strategy: str = Field(alias="chunking_strategy") 14 | chunk_size: Union[int, List] = Field(alias="chunk_size") 15 | chunk_overlap: Union[int, List] = Field(alias="chunk_overlap") 16 | hierarchical_parent_chunk_size: int = Field(alias="hierarchical_parent_chunk_size") 17 | hierarchical_child_chunk_size: int = Field(alias="hierarchical_child_chunk_size") 18 | hierarchical_chunk_overlap_percentage: int = Field(alias="hierarchical_chunk_overlap_percentage") 19 | embedding_service: str = Field(alias="embedding_service") 20 | embedding_model: str = Field(alias="embedding_model") 21 | embedding_model_endpoint: str = None 22 | indexing_algorithm: str = Field(alias="indexing_algorithm") 23 | index_id: str = Field(alias="index_id") 24 | n_shot_prompts: int = Field(alias="n_shot_prompts") 25 | n_shot_prompt_guide: Optional[str] = None 26 | n_shot_prompt_guide_obj: 'NShotPromptGuide' = None 27 | knn_num: Union[int, List] = Field(alias="knn_num") 28 | temp_retrieval_llm: float = Field(alias="temp_retrieval_llm") 29 | retrieval_service: str = Field(alias="retrieval_service") 30 | retrieval_model: str = Field(alias="retrieval_model") 31 | retrieval_model_endpoint: str = None 32 | vector_dimension: Union[int, List] = Field(alias="vector_dimension") 33 | enable_guardrails: bool = False 34 | guardrail_id: Optional[str] = None 35 | guardrail_version: Optional[str] = None 36 | enable_prompt_guardrails: bool = False 37 | enable_context_guardrails: bool = False 38 | enable_response_guardrails: bool = False 39 | # This should ideally work need furthur debugging 40 | llm_based_eval: bool = True 41 | eval_service: str = 'ragas' 42 | eval_embedding_model: str = 'amazon.titan-embed-text-v1' 43 | eval_retrieval_model: str = 'mistral.mixtral-8x7b-instruct-v0:1' 44 | eval_retrieval_temperature: float = float(0.1) #float(0.4) 45 | # Rerank model id 46 | rerank_model_id: str = Field(alias="rerank_model_id", default="none") 47 | bedrock_knowledge_base: bool = False 48 | knowledge_base: bool = True 49 | is_opensearch: bool = True 50 | class Config: 51 | alias_generator = lambda string: string.replace("-", "_") 52 | populate_by_name = True 53 | 54 | class NShotPromptGuide(BaseModel): 55 | system_prompt: str 56 | examples: Optional[list[dict[str, str]]] = Field(default=None) 57 | user_prompt: str 58 | -------------------------------------------------------------------------------- /constants/__init__.py: -------------------------------------------------------------------------------- 1 | from .sagemaker_constants import SageMakerInstanceConstants 2 | from .app_constants import ErrorTypes, StatusCodes -------------------------------------------------------------------------------- /constants/app_constants.py: -------------------------------------------------------------------------------- 1 | class ErrorTypes: 2 | VALIDATION_ERROR = "VALIDATION_ERROR" 3 | SERVER_ERROR = "SERVER_ERROR" 4 | UNAUTHORIZED_ERROR = "UNAUTHORIZED_ERROR" 5 | NOT_FOUND_ERROR = "NOT_FOUND_ERROR" 6 | 7 | class StatusCodes: 8 | SUCCESS = 200 9 | CREATED = 201 10 | ACCEPTED = 202 11 | BAD_REQUEST = 400 12 | UNAUTHORIZED = 401 13 | FORBIDDEN = 403 14 | NOT_FOUND = 404 15 | INTERNAL_SERVER_ERROR = 500 -------------------------------------------------------------------------------- /constants/sagemaker_constants.py: -------------------------------------------------------------------------------- 1 | from typing import Final, Dict 2 | 3 | class SageMakerInstanceConstants: 4 | INSTANCE_CONFIGS: Final[Dict[str, str]] = { 5 | "BAAI/bge-large-en-v1.5": "ml.g5.2xlarge", 6 | "Qwen/Qwen2.5-32B-Instruct": "g6e.12xlarge", 7 | "Qwen/Qwen2.5-14B-Instruct": "g6e.2xlarge", 8 | "meta-llama/Llama-3.1-8B": "ml.g5.2xlarge", 9 | "meta-llama/Llama-3.1-70B-Instruct": "g6e.12xlarge" 10 | } 11 | -------------------------------------------------------------------------------- /constants/validation_status.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class ValidationStatus(Enum): 4 | QUEUED = "queued" 5 | INPROGRESS = "inprogress" 6 | FAILED = "failed" 7 | COMPLETED = "completed" -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/core/__init__.py -------------------------------------------------------------------------------- /core/chunking/__init__.py: -------------------------------------------------------------------------------- 1 | from .fixed_chunker import FixedChunker 2 | from .hierarchical_chunker import HierarchicalChunker -------------------------------------------------------------------------------- /core/chunking/fixed_chunker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from langchain.text_splitter import CharacterTextSplitter 3 | from baseclasses.base_classes import BaseChunker 4 | 5 | 6 | class FixedChunker(BaseChunker): 7 | """Fixed chunking strategy using LangChain’s CharacterTextSplitter.""" 8 | 9 | def chunk(self, text : str) -> List[str]: 10 | if self.chunk_size <= 0: 11 | raise ValueError("chunk_size must be positive") 12 | if self.chunk_overlap >= self.chunk_size: 13 | raise ValueError("chunk_overlap must be less than chunk_size") 14 | if not text: 15 | raise ValueError("Input text cannot be empty or None") 16 | 17 | # TODO: Temporary fix, better to move to recursive 18 | separators = [' ', '\t', '\n', '\r', '\f', '\v'] 19 | for sep in separators: 20 | text = text.replace(sep, ' ') 21 | 22 | # chunk size is in tokens, general norm : 1 token = 4 chars 23 | chunk_size = 4 * self.chunk_size 24 | # overlap is in percentage 25 | chunk_overlap = int(self.chunk_overlap * chunk_size / 100) 26 | self.text_splitter = CharacterTextSplitter( 27 | separator=" ", 28 | chunk_size=chunk_size, 29 | chunk_overlap=chunk_overlap, 30 | length_function=len, 31 | is_separator_regex=False 32 | ) 33 | chunks = self.text_splitter.split_text(text) 34 | return chunks -------------------------------------------------------------------------------- /core/chunking/hierarchical_chunker.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from baseclasses.base_classes import BaseHierarchicalChunker 3 | from langchain.text_splitter import CharacterTextSplitter 4 | import uuid 5 | 6 | class HierarchicalChunker(BaseHierarchicalChunker): 7 | """Hierarchical chunking strategy.""" 8 | def chunk(self, text : str) -> List[List[str]]: 9 | overlap_tokens = int((self.chunk_overlap / 100) * self.child_chunk_size) 10 | if self.parent_chunk_size <= 0: 11 | raise ValueError("parent chunk size must be positive") 12 | if self.child_chunk_size <= 0: 13 | raise ValueError("child chunk size must be positive") 14 | if self.child_chunk_size > self.parent_chunk_size: 15 | raise ValueError("child chunk size must be less than parent chunk size") 16 | if overlap_tokens >= self.child_chunk_size: 17 | raise ValueError("chunk_overlap must be less than child chunk size") 18 | if not text: 19 | raise ValueError("Input text cannot be empty or None") 20 | 21 | # TODO: Temporary fix, better to move to recursive 22 | separators = [' ', '\t', '\n', '\r', '\f', '\v'] 23 | for sep in separators: 24 | text = text.replace(sep, ' ') 25 | 26 | 27 | # chunk size is in tokens, general norm : 1 token = 4 chars 28 | parent_character_chunk_size = 4 * self.parent_chunk_size 29 | child_character_chunk_size = 4 * self.child_chunk_size 30 | # overlap is in percentage 31 | child_chunk_overlap_characters = int(self.chunk_overlap * child_character_chunk_size / 100) 32 | self.parent_text_splitter = CharacterTextSplitter( 33 | separator=" ", 34 | chunk_size=parent_character_chunk_size, 35 | chunk_overlap=0, # Can change this at a later point of time 36 | length_function=len, 37 | is_separator_regex=False 38 | ) 39 | self.child_text_splitter = CharacterTextSplitter( 40 | separator=" ", 41 | chunk_size=child_character_chunk_size, 42 | chunk_overlap=child_chunk_overlap_characters, 43 | length_function=len, 44 | is_separator_regex=False 45 | ) 46 | parent_chunks = self.parent_text_splitter.split_text(text) 47 | overall_chunks = [] 48 | for parent_chunk in parent_chunks: 49 | parent_id = str(uuid.uuid4()) 50 | child_chunks = self.child_text_splitter.split_text(parent_chunk) 51 | for child_chunk in child_chunks: 52 | temp_chunk = (parent_id, parent_chunk, child_chunk) 53 | overall_chunks.append(temp_chunk) 54 | return overall_chunks 55 | -------------------------------------------------------------------------------- /core/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | from core.embedding.embedding_factory import EmbedderFactory # Keep this for external usage. 2 | 3 | import core.embedding.bedrock.cohere_embedder 4 | import core.embedding.bedrock.titanv1_embedder 5 | import core.embedding.bedrock.titanv2_embedder 6 | 7 | # Importing SageMaker-specific embedder and embedding factory. 8 | from .embedding_factory import EmbedderFactory 9 | from .sagemaker import SageMakerEmbedder 10 | 11 | # List of model names that you want to register with the EmbedderFactory 12 | model_list = [ 13 | "huggingface-sentencesimilarity-bge-large-en-v1-5", # Model for sentence similarity (BGE, Large). 14 | "huggingface-sentencesimilarity-bge-m3", # Another sentence similarity model (BGE, M3). 15 | "huggingface-textembedding-gte-qwen2-7b-instruct" # Text embedding model (Qwen2-7B, Instruct). 16 | ] 17 | 18 | # Registering each model from the list into the EmbedderFactory under 'sagemaker'. 19 | # The `SageMakerEmbedder` will be used for embedding operations for these models. 20 | for model in model_list: 21 | EmbedderFactory.register_embedder('sagemaker', model, SageMakerEmbedder) -------------------------------------------------------------------------------- /core/embedding/bedrock/__init__.py: -------------------------------------------------------------------------------- 1 | from .bedrock_embedder import BedrockEmbedder 2 | -------------------------------------------------------------------------------- /core/embedding/bedrock/bedrock_embedder.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from typing import Dict, List, Tuple, Any 3 | from baseclasses.base_classes import BaseEmbedder 4 | from util.boto3_utils import BedRockRetryHander 5 | import json 6 | 7 | import logging 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | 12 | # Bedrock Base Embedder 13 | class BedrockEmbedder(BaseEmbedder): 14 | def __init__(self, model_id: str, region: str, role_arn: str = None) -> None: 15 | super().__init__(model_id) 16 | self.client = boto3.client("bedrock-runtime", region_name=region) 17 | 18 | def prepare_payload(self, text: str, dimensions: int, normalize: bool) -> Dict: 19 | raise NotImplementedError("Subclasses must implement `prepare_payload`") 20 | 21 | @BedRockRetryHander() 22 | def embed(self, text: str, dimensions: int = 256, normalize: bool = True) -> Tuple[Dict[Any, Any], List[float]]: 23 | try: 24 | payload = self.prepare_payload(text, dimensions, normalize) 25 | response = self.client.invoke_model( 26 | modelId=self.model_id, 27 | contentType="application/json", 28 | accept="application/json", 29 | body=json.dumps(payload) 30 | ) 31 | model_response = json.loads(response["body"].read()) 32 | metadata = {} 33 | if response and 'ResponseMetadata' in response and 'HTTPHeaders' in response['ResponseMetadata']: 34 | input_tokens = response['ResponseMetadata']['HTTPHeaders']['x-amzn-bedrock-input-token-count'] 35 | latency = response['ResponseMetadata']['HTTPHeaders']['x-amzn-bedrock-invocation-latency'] 36 | metadata = { 37 | 'inputTokens': input_tokens, 38 | 'latencyMs': latency 39 | } 40 | return metadata, self.extract_embedding(model_response) 41 | except Exception as e: 42 | logger.error(f"Error during embedding: {e}") 43 | raise 44 | 45 | def extract_embedding(self, response: Dict) -> List[float]: 46 | raise NotImplementedError("Subclasses must implement `extract_embedding`") -------------------------------------------------------------------------------- /core/embedding/bedrock/cohere_embedder.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, List 3 | import logging 4 | 5 | from core.embedding import EmbedderFactory 6 | from baseclasses.base_classes import BaseEmbedder 7 | from . import BedrockEmbedder 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | 12 | class CohereEmbedder(BedrockEmbedder): 13 | def prepare_payload(self, text: str, dimensions: int, normalize: bool) -> Dict: 14 | return {"texts": [text], "input_type": "search_document"} 15 | 16 | def extract_embedding(self, response: Dict) -> List[float]: 17 | return response["embeddings"][0] 18 | 19 | EmbedderFactory.register_embedder("bedrock", "cohere.embed-english-v3", CohereEmbedder) 20 | EmbedderFactory.register_embedder("bedrock", "cohere.embed-multilingual-v3", CohereEmbedder) -------------------------------------------------------------------------------- /core/embedding/bedrock/titanv1_embedder.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, List 3 | import logging 4 | 5 | from core.embedding import EmbedderFactory 6 | from baseclasses.base_classes import BaseEmbedder 7 | from . import BedrockEmbedder 8 | 9 | 10 | logger = logging.getLogger() 11 | logger.setLevel(logging.INFO) 12 | 13 | class TitanV1Embedder(BedrockEmbedder): 14 | def prepare_payload(self, text: str, dimensions: int, normalize: bool) -> Dict: 15 | return {"inputText": text, "embeddingConfig" : {"outputEmbeddingLength" : dimensions}} 16 | 17 | def extract_embedding(self, response: Dict) -> List[float]: 18 | return response["embedding"] 19 | 20 | EmbedderFactory.register_embedder("bedrock", "amazon.titan-embed-image-v1", TitanV1Embedder) -------------------------------------------------------------------------------- /core/embedding/bedrock/titanv2_embedder.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import Dict, List 3 | import logging 4 | 5 | from core.embedding import EmbedderFactory 6 | from baseclasses.base_classes import BaseEmbedder 7 | from . import BedrockEmbedder 8 | 9 | 10 | logger = logging.getLogger() 11 | logger.setLevel(logging.INFO) 12 | 13 | class TitanV2Embedder(BedrockEmbedder): 14 | def prepare_payload(self, text: str, dimensions: int, normalize: bool) -> Dict: 15 | return {"inputText": text, "dimensions": dimensions, "normalize": normalize} 16 | 17 | def extract_embedding(self, response: Dict) -> List[float]: 18 | return response["embedding"] 19 | 20 | EmbedderFactory.register_embedder("bedrock", "amazon.titan-embed-text-v2:0", TitanV2Embedder) -------------------------------------------------------------------------------- /core/embedding/embedding_factory.py: -------------------------------------------------------------------------------- 1 | from config.experimental_config import ExperimentalConfig 2 | 3 | import logging 4 | 5 | from typing import Type, Dict 6 | from baseclasses.base_classes import BaseEmbedder 7 | from config.config import get_config 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | 12 | class EmbedderFactory: 13 | """Factory to create embedders based on model ID and service type.""" 14 | 15 | _registry: Dict[str, Type[BaseEmbedder]] = {} 16 | 17 | @classmethod 18 | def register_embedder(cls, service_type: str, model_id: str, embedder_cls: Type[BaseEmbedder]): 19 | key = f"{service_type}:{model_id}" 20 | cls._registry[key] = embedder_cls 21 | 22 | @classmethod 23 | def create_embedder(cls, experimentalConfig : ExperimentalConfig) -> BaseEmbedder: 24 | service_type = experimentalConfig.embedding_service 25 | model_id = experimentalConfig.embedding_model 26 | key = f"{service_type}:{model_id}" 27 | 28 | if experimentalConfig.embedding_service == "sagemaker": 29 | role_arn = get_config().sagemaker_role_arn 30 | print(f"Sagemaker role: {role_arn}") 31 | elif experimentalConfig.embedding_service == "bedrock": 32 | role_arn = get_config().bedrock_role_arn 33 | embedder_cls = cls._registry.get(key) 34 | if not embedder_cls: 35 | raise ValueError(f"No embedder registered for service {service_type} and model {model_id}") 36 | 37 | return embedder_cls(model_id, experimentalConfig.aws_region, role_arn) 38 | 39 | -------------------------------------------------------------------------------- /core/embedding/sagemaker/__init__.py: -------------------------------------------------------------------------------- 1 | from .sagemaker_embedder import SageMakerEmbedder 2 | -------------------------------------------------------------------------------- /core/eval/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_factory import EvalFactory 2 | from .ragas.ragas_non_llm_eval import RagasNonLLMEvaluator 3 | from .ragas.ragas_llm_eval import RagasLLMEvaluator -------------------------------------------------------------------------------- /core/eval/eval_factory.py: -------------------------------------------------------------------------------- 1 | from baseclasses.base_classes import BaseEvaluator 2 | from config.experimental_config import ExperimentalConfig 3 | from config.config import Config 4 | from typing import Dict, Type 5 | 6 | import logging 7 | 8 | logger = logging.getLogger() 9 | logger.setLevel(logging.INFO) 10 | 11 | 12 | class EvaluatorServiceError(Exception): 13 | """Custom exception for inference service related errors""" 14 | pass 15 | 16 | class EvalFactory: 17 | 18 | _registry: Dict[str, Type[BaseEvaluator]] = {} 19 | 20 | @classmethod 21 | def register_evaluator(cls, service_type: str, eval_type: str, evaluator_cls: Type[BaseEvaluator]): 22 | key = f"{service_type}:{eval_type}" 23 | cls._registry[key] = evaluator_cls 24 | 25 | @classmethod 26 | def create_evaluator(cls, experimentalConfig: ExperimentalConfig) -> BaseEvaluator: 27 | config = Config.load_config() 28 | 29 | eval_service_type = experimentalConfig.eval_service 30 | eval_type = 'llm' if experimentalConfig.llm_based_eval else 'non_llm' 31 | 32 | key = f"{eval_service_type}:{eval_type}" 33 | 34 | evaluator_cls = cls._registry.get(key) 35 | if not evaluator_cls: 36 | raise EvaluatorServiceError(f"No evaluator_cls registered for service {eval_service_type} and type {eval_type}") 37 | 38 | return evaluator_cls(config=config, experimental_config=experimentalConfig) 39 | -------------------------------------------------------------------------------- /core/eval/ragas/ragas_eval.py: -------------------------------------------------------------------------------- 1 | from baseclasses.base_classes import BaseEvaluator, EvaluationMetrics 2 | from core.dynamodb import DynamoDBOperations 3 | from typing import List, Dict 4 | import json 5 | import numpy as np 6 | 7 | import logging 8 | logger = logging.getLogger(__name__) 9 | logger.setLevel(logging.INFO) 10 | 11 | class RagasEvaluator(BaseEvaluator): 12 | 13 | def __init__(self, config, experimental_config): 14 | super().__init__(config, experimental_config) 15 | self._initialize_dynamodb() 16 | 17 | def _initialize_dynamodb(self): 18 | """Initialize DynamoDB connections""" 19 | self.metrics_db = DynamoDBOperations( 20 | region=self.config.aws_region, 21 | table_name=self.config.experiment_question_metrics_table 22 | ) 23 | self.experiment_db = DynamoDBOperations( 24 | region=self.config.aws_region, 25 | table_name=self.config.experiment_table 26 | ) 27 | 28 | def get_all_questions(self, experiment_id: str) -> List[Dict]: 29 | """Fetch all questions for a given experiment""" 30 | expression_values = {":experimentId": experiment_id} 31 | return self.metrics_db.query( 32 | "experiment_id = :experimentId", 33 | expression_values=expression_values, 34 | index_name=self.config.experiment_question_metrics_experimentid_index 35 | ) 36 | 37 | def update_experiment_metrics(self, experiment_id: str, experiment_eval_metrics: Dict[str, float]): 38 | """Update overall experiment metrics""" 39 | try: 40 | if experiment_eval_metrics: 41 | logger.info(f"Updating experiment metrics for experiment {experiment_id}") 42 | self.experiment_db.update_item( 43 | key={'id': experiment_id}, 44 | update_expression="SET eval_metrics = :eval", 45 | expression_values={':eval': {'M' : experiment_eval_metrics}} 46 | ) 47 | except Exception as e: 48 | logger.error(f"Error updating experiment metrics: {e}") 49 | 50 | def calculate_eval_score(self,evaluator,data): 51 | try: 52 | score=evaluator.single_turn_score(data) 53 | return score 54 | except Exception as e: 55 | logger.error(f"Error processing sample : {e}") 56 | return 0.0 57 | 58 | -------------------------------------------------------------------------------- /core/eval/ragas/ragas_non_llm_eval.py: -------------------------------------------------------------------------------- 1 | from core.eval.ragas.ragas_eval import RagasEvaluator 2 | from ragas.metrics import NonLLMStringSimilarity, NonLLMContextRecall, NonLLMContextPrecisionWithReference, RougeScore, BleuScore, Faithfulness 3 | from baseclasses.base_classes import ExperimentQuestionMetrics, EvaluationMetrics 4 | from ragas.dataset_schema import SingleTurnSample 5 | from typing import Optional 6 | from core.eval.eval_factory import EvalFactory 7 | 8 | import logging 9 | logger = logging.getLogger(__name__) 10 | logger.setLevel(logging.INFO) 11 | 12 | class RagasNonLLMEvaluator(RagasEvaluator): 13 | 14 | def __init__(self, config, experimental_config): 15 | super().__init__(config, experimental_config) 16 | self._initialize_scorers() 17 | 18 | 19 | def _initialize_scorers(self): 20 | """Initialize all metric scorers""" 21 | self.str_similar_scorer = NonLLMStringSimilarity() 22 | self.context_recall = NonLLMContextRecall() 23 | self.context_precision = NonLLMContextPrecisionWithReference() 24 | self.rouge_score = RougeScore() 25 | self.bleu_score = BleuScore() 26 | 27 | def get_questions(self, experiment_id): 28 | return super().get_questions(experiment_id) 29 | 30 | def evaluate(self, experiment_id: str): 31 | """Perform evaluation for all questions in an experiment""" 32 | if not experiment_id: 33 | raise ValueError("Experiment ID cannot be None") 34 | 35 | questions = self.get_all_questions(experiment_id)['Items'] 36 | metrics_list = [] 37 | evaluation_dict = {} 38 | 39 | for question in questions: 40 | metrics_record = ExperimentQuestionMetrics(**question) 41 | metrics = self._evaluate_single_question(metrics_record) 42 | 43 | if metrics: 44 | metrics_list.append(metrics) 45 | update_expression = "SET eval_metrics = :metrics" 46 | expression_attribute_values = { 47 | ':metrics': {'M' : metrics.to_dict()} 48 | } 49 | 50 | self.metrics_db.update_item( 51 | key={'id': metrics_record.id}, 52 | update_expression=update_expression, 53 | expression_values=expression_attribute_values 54 | ) 55 | self.update_experiment_metrics(experiment_id, metrics_list) 56 | 57 | def _evaluate_single_question(self, metrics_record: ExperimentQuestionMetrics) -> Optional[EvaluationMetrics]: 58 | """Evaluate a single question and return its metrics""" 59 | try: 60 | answer_sample = SingleTurnSample( 61 | response=metrics_record.generated_answer, 62 | reference=metrics_record.gt_answer 63 | ) 64 | 65 | context_sample = SingleTurnSample( 66 | retrieved_contexts=[metrics_record.generated_answer], 67 | reference_contexts=metrics_record.reference_contexts 68 | ) 69 | 70 | metrics = EvaluationMetrics( 71 | string_similarity=self.calculate_eval_score(self.str_similar_scorer, answer_sample), 72 | context_precision=self.calculate_eval_score(self.context_precision, context_sample), 73 | context_recall=self.calculate_eval_score(self.context_recall, context_sample), 74 | rouge_score=self.calculate_eval_score(self.rouge_score,answer_sample) 75 | ) 76 | 77 | return metrics 78 | except Exception as e: 79 | logger.error(f"Error processing sample {metrics_record.id}: {e}") 80 | return None 81 | 82 | EvalFactory.register_evaluator('ragas', 'non_llm', RagasNonLLMEvaluator) -------------------------------------------------------------------------------- /core/inference/__init__.py: -------------------------------------------------------------------------------- 1 | from .bedrock.bedrock_inferencer import BedrockInferencer 2 | 3 | # Importing SageMaker-specific inference and inference factory. 4 | from .inference_factory import InferencerFactory 5 | from .sagemaker.sagemaker_inferencer import SageMakerInferencer 6 | from .sagemaker.llama_inferencer import LlamaInferencer 7 | 8 | # List of model names that you want to register with the InferencerFactory 9 | model_list = [ 10 | "meta-textgeneration-llama-3-1-8b-instruct", # Model for text generation (Llama) 11 | "huggingface-llm-falcon-7b-instruct-bf16", # Model for text generation (Falcon) 12 | "meta-vlm-llama-4-scout-17b-16e-instruct", 13 | "meta-textgeneration-llama-3-3-70b-instruct", #Llama model with more parameters for text generation 14 | "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", 15 | "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", 16 | "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 17 | "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" 18 | ] 19 | 20 | # Registering each model from the list into the InferencerFactory under 'sagemaker'. 21 | # The `SageMakerInferencer` will be used for inferencing operations for these models. 22 | for model in model_list: 23 | if model.startswith("meta-vlm-llama-4"): 24 | InferencerFactory.register_inferencer('sagemaker', model, LlamaInferencer) 25 | else: 26 | InferencerFactory.register_inferencer('sagemaker', model, SageMakerInferencer) -------------------------------------------------------------------------------- /core/inference/inference_factory.py: -------------------------------------------------------------------------------- 1 | from config.config import get_config 2 | from config.experimental_config import ExperimentalConfig 3 | import logging 4 | from baseclasses.base_classes import BaseInferencer 5 | from typing import Dict, Type 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | class InferenceServiceError(Exception): 11 | """Custom exception for inference service related errors""" 12 | pass 13 | 14 | 15 | class InferencerFactory: 16 | 17 | """Factory to create embedders based on model ID and service type.""" 18 | 19 | _registry: Dict[str, Type[BaseInferencer]] = {} 20 | 21 | @classmethod 22 | def register_inferencer(cls, service_type: str, model_id: str, embedder_cls: Type[BaseInferencer]): 23 | key = f"{service_type}:{model_id}" 24 | cls._registry[key] = embedder_cls 25 | 26 | @classmethod 27 | def create_inferencer(cls, experimentalConfig : ExperimentalConfig) -> BaseInferencer: 28 | service_type = experimentalConfig.retrieval_service 29 | model_id = experimentalConfig.retrieval_model 30 | key = f"{service_type}:{model_id}" 31 | 32 | inferencer_cls = cls._registry.get(key) 33 | if not inferencer_cls: 34 | raise InferenceServiceError(f"No inferencer_cls registered for service {service_type} and model {model_id}") 35 | 36 | if service_type == "sagemaker": 37 | role_arn = get_config().sagemaker_role_arn 38 | elif service_type == "bedrock": 39 | role_arn = get_config().bedrock_role_arn 40 | else: 41 | role_arn = None 42 | 43 | # return inferencer_cls(model_id=model_id, region=experimentalConfig.aws_region, experiment_config=experimentalConfig) 44 | return inferencer_cls( 45 | model_id=model_id, 46 | experiment_config=experimentalConfig, 47 | region=experimentalConfig.aws_region, 48 | role_arn=role_arn 49 | ) -------------------------------------------------------------------------------- /core/knowledgebase_vectorstore.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Any, Union 2 | import boto3 3 | import logging 4 | from baseclasses.base_classes import VectorDatabase 5 | from config.config import Config 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | class KnowledgeBaseVectorDatabase(VectorDatabase): 11 | def __init__(self, region: str = 'us-east-1'): 12 | self.client = boto3.client("bedrock-agent-runtime", region_name=region) 13 | 14 | def create_index(self, index_name: str, mapping: Dict[str, Any], algorithm: str) -> None: 15 | raise NotImplementedError("This method is not implemented in this minimal version.") 16 | 17 | def update_index(self, index_name: str, new_mapping: Dict[str, Any]) -> None: 18 | raise NotImplementedError("This method is not implemented in this minimal version.") 19 | 20 | def delete_index(self, index_name: str) -> None: 21 | raise NotImplementedError("This method is not implemented in this minimal version.") 22 | 23 | def insert_document(self, index_name: str, document: Dict[str, Any]) -> None: 24 | raise NotImplementedError("This method is not implemented in this minimal version.") 25 | 26 | 27 | def _format_response(self, data): 28 | formatted_results = [] 29 | 30 | for result in data.get('retrievalResults', []): 31 | content = result.get('content', {}) 32 | text = content.get('text', '') 33 | 34 | if text: 35 | formatted_results.append({'text': text}) 36 | 37 | return formatted_results 38 | 39 | def search(self, query: str, kb_data: str, knn: int): 40 | query = {"text": query} 41 | retrievalConfiguration={ 42 | 'vectorSearchConfiguration': { 43 | 'numberOfResults': knn 44 | } 45 | } 46 | response = self.client.retrieve(knowledgeBaseId = kb_data, 47 | retrievalQuery = query, 48 | retrievalConfiguration=retrievalConfiguration) 49 | formatted_context = self._format_response(response) 50 | logger.info("Getting results from knowledge base") 51 | return formatted_context 52 | -------------------------------------------------------------------------------- /core/processors/__init__.py: -------------------------------------------------------------------------------- 1 | from .chunking_processor import ChunkingProcessor 2 | from .embed_processor import EmbedProcessor 3 | from .inference_processor import InferenceProcessor 4 | from .eval_processor import EvalProcessor 5 | -------------------------------------------------------------------------------- /core/processors/chunking_processor.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Type, Union 2 | from core.chunking import FixedChunker, HierarchicalChunker 3 | from baseclasses.base_classes import BaseChunker, BaseHierarchicalChunker 4 | import logging 5 | from config.experimental_config import ExperimentalConfig 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | class ChunkingProcessor: 11 | """Processor for managing text chunking.""" 12 | 13 | CHUNKER_STRATEGIES: Dict[str, Union[Type[BaseChunker], Type[BaseHierarchicalChunker]]] = { 14 | "Fixed": FixedChunker, 15 | "Hierarchical": HierarchicalChunker 16 | } 17 | 18 | def __init__(self, experimentalConfig : ExperimentalConfig) -> None: 19 | self.experimentalConfig = experimentalConfig 20 | self.chunker = self._initialize_chunker() 21 | 22 | def _initialize_chunker(self) -> Union[BaseChunker, BaseHierarchicalChunker]: 23 | """Initialize the chunker based on the selected strategy.""" 24 | strategy = self.experimentalConfig.chunking_strategy.lower() # Normalize to lower case 25 | chunker_strategies = {key.lower(): value for key, value in self.CHUNKER_STRATEGIES.items()} # Case-insensitive map 26 | if strategy not in chunker_strategies: 27 | raise ValueError(f"Unknown chunking strategy: {strategy}") 28 | 29 | logger.info(f"Initializing {strategy} chunker...") 30 | if strategy == 'fixed': 31 | return chunker_strategies[strategy]( 32 | self.experimentalConfig.chunk_size, 33 | self.experimentalConfig.chunk_overlap 34 | ) 35 | elif strategy == 'hierarchical': 36 | return chunker_strategies[strategy]( 37 | self.experimentalConfig.hierarchical_parent_chunk_size, 38 | self.experimentalConfig.hierarchical_child_chunk_size, 39 | self.experimentalConfig.hierarchical_chunk_overlap_percentage 40 | ) 41 | 42 | def chunk(self, texts: List[str]) -> Union[List[str], List[List[str]]]: 43 | """Chunk the input list of text into a single flat list""" 44 | all_chunks = [chunk for text in texts for chunk in self.chunker.chunk(text)] 45 | return all_chunks -------------------------------------------------------------------------------- /core/processors/embed_processor.py: -------------------------------------------------------------------------------- 1 | from core.embedding import EmbedderFactory 2 | from typing import Dict, List, Tuple, Any 3 | from config.experimental_config import ExperimentalConfig 4 | import logging 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | class EmbedProcessor: 10 | """Processor for embedding text chunks.""" 11 | 12 | def __init__(self, experimentalConfig : ExperimentalConfig) -> None: 13 | self.experimentalConfig = experimentalConfig 14 | self.embedder = EmbedderFactory.create_embedder(experimentalConfig) 15 | 16 | def embed(self, chunks: List[str]) -> List[Tuple[List[float], str, Dict[Any, Any]]]: 17 | """Embed each chunk one by one.""" 18 | embeddings = [] 19 | try: 20 | dimensions = self.experimentalConfig.vector_dimension 21 | normalize = True # Always normalize 22 | 23 | logger.info(f"Embedding {len(chunks)} chunks with dimensions: {dimensions}.") 24 | for idx, chunk in enumerate(chunks): 25 | logger.debug(f"Embedding chunk {idx + 1}/{len(chunks)}: {chunk[:50]}...") 26 | metadata, embedding = self.embedder.embed(chunk, dimensions=dimensions, normalize=normalize) 27 | embeddings.append((embedding, chunk, metadata)) # Append as tuple 28 | 29 | logger.info("Embedding process completed successfully.") 30 | return embeddings 31 | except Exception as e: 32 | logger.error(f"Error during embedding process: {e}") 33 | raise 34 | 35 | def embed_text(self, text: str) -> Tuple[Dict[Any, Any], List[float]]: 36 | """Embed each chunk one by one.""" 37 | try: 38 | dimensions = self.experimentalConfig.vector_dimension 39 | normalize = True # Always normalize 40 | metadata, embedding = self.embedder.embed(text, dimensions=dimensions, normalize=normalize) 41 | logger.info("Embedding text process completed successfully.") 42 | return metadata, embedding 43 | except Exception as e: 44 | logger.error(f"Error during embedding process: {e}") 45 | raise 46 | -------------------------------------------------------------------------------- /core/processors/eval_processor.py: -------------------------------------------------------------------------------- 1 | from core.eval.eval_factory import EvalFactory 2 | from typing import Dict, List, Tuple 3 | from config.experimental_config import ExperimentalConfig 4 | import logging 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | class EvalProcessor: 10 | """Processor for embedding text chunks.""" 11 | 12 | def __init__(self, experimentalConfig : ExperimentalConfig) -> None: 13 | self.experimentalConfig = experimentalConfig 14 | self.evaluator = EvalFactory.create_evaluator(experimentalConfig) 15 | 16 | def evaluate(self) -> None: 17 | try: 18 | self.evaluator.evaluate(experiment_id=self.experimentalConfig.experiment_id) 19 | except Exception as e: 20 | logger.error(f"Error generating eval: {str(e)}") 21 | raise 22 | -------------------------------------------------------------------------------- /core/processors/inference_processor.py: -------------------------------------------------------------------------------- 1 | from core.inference import InferencerFactory 2 | from typing import Dict, List, Tuple, Any 3 | from config.experimental_config import ExperimentalConfig 4 | import logging 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | class InferenceProcessor: 10 | """Processor for embedding text chunks.""" 11 | 12 | def __init__(self, experimentalConfig : ExperimentalConfig) -> None: 13 | self.experimentalConfig = experimentalConfig 14 | self.inferencer = InferencerFactory.create_inferencer(experimentalConfig) 15 | 16 | def generate_text(self, user_query: str, default_prompt: str, context: List[Dict] = None, **kwargs) -> Tuple[Dict[Any,Any], str]: 17 | try: 18 | metadata, answer = self.inferencer.generate_text( 19 | user_query=user_query, 20 | context = context, 21 | default_prompt = default_prompt, 22 | experiment_config = self.experimentalConfig 23 | ) 24 | return metadata, answer 25 | except Exception as e: 26 | logger.error(f"Error generating text with Inferencer: {str(e)}") 27 | raise 28 | -------------------------------------------------------------------------------- /core/rerank/rerank.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import boto3 3 | from config.experimental_config import ExperimentalConfig 4 | from config.config import Config, get_config 5 | 6 | # Set up logging 7 | logger = logging.getLogger(__name__) 8 | logger.setLevel(logging.ERROR) 9 | 10 | class DocumentReranker: 11 | def __init__(self, region, rerank_model_id): 12 | """ 13 | Initialize the DocumentReranker with the AWS region, model ID, and Bedrock agent runtime. 14 | 15 | Args: 16 | region (str): The AWS region to use. 17 | model_id (str): The model ID to use for reranking. 18 | bedrock_agent_runtime (object): The Bedrock agent runtime instance to interact with the API. 19 | """ 20 | self.region = region 21 | self.rerank_model_id = rerank_model_id 22 | self.bedrock_agent_runtime = boto3.client('bedrock-agent-runtime', region_name=self.region) 23 | 24 | def rerank_documents(self, input_prompt, retrieved_documents): 25 | """ 26 | Rerank a list of documents based on a query using Amazon Bedrock's reranking model. 27 | 28 | Args: 29 | input_prompt (str): The query to rerank documents for. 30 | retrieved_documents (list): The list of documents to be reranked. 31 | 32 | Returns: 33 | list: A list of reranked documents in order of relevance. 34 | """ 35 | try: 36 | # Construct the model ARN using the provided model ID 37 | model_package_arn = f"arn:aws:bedrock:{self.region}::foundation-model/{self.rerank_model_id}" 38 | rerank_return_count = len(retrieved_documents) 39 | 40 | # Prepare the text sources for the documents (wrap text in a dictionary) 41 | document_sources = [{ 42 | "type": "INLINE", 43 | "inlineDocumentSource": { 44 | "type": "TEXT", 45 | "textDocument": { 46 | "text": doc['text'] # Wrap the text in a dictionary 47 | } 48 | } 49 | } for doc in retrieved_documents] 50 | 51 | # Call the Bedrock API for reranking 52 | response = self.bedrock_agent_runtime.rerank( 53 | queries=[{ 54 | "type": "TEXT", 55 | "textQuery": {"text": input_prompt} 56 | }], 57 | sources=document_sources, 58 | rerankingConfiguration={ 59 | "type": "BEDROCK_RERANKING_MODEL", 60 | "bedrockRerankingConfiguration": { 61 | "numberOfResults": rerank_return_count, 62 | "modelConfiguration": {"modelArn": model_package_arn} 63 | } 64 | } 65 | ) 66 | 67 | # Check if 'results' exist in the response and log the structure 68 | if 'results' not in response: 69 | logger.error("Error in rerank response: No results found.") 70 | return [] 71 | 72 | # Create a list to store the reranked documents 73 | reranked_documents = [] 74 | 75 | # Process the results 76 | for rank, result in enumerate(response['results']): 77 | if isinstance(result, dict) and 'index' in result: 78 | original_index = result['index'] 79 | reranked_documents.append({'text': retrieved_documents[original_index]['text']}) 80 | else: 81 | logger.error(f"Unexpected result format: {result}") 82 | 83 | logger.info(f"Reranked documents: {len(reranked_documents)}") 84 | # Return the reranked documents, ensuring we return only as many as requested 85 | return reranked_documents[:rerank_return_count] 86 | 87 | except Exception as e: 88 | # Catch any other unforeseen errors 89 | logger.error(f"An error occurred: {e}") 90 | return [] -------------------------------------------------------------------------------- /cover-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/cover-image.png -------------------------------------------------------------------------------- /dataset/amazon_bedrock_userguide/Amazon_Bedrock_Dataset.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/dataset/amazon_bedrock_userguide/Amazon_Bedrock_Dataset.pdf -------------------------------------------------------------------------------- /dataset/amazon_bedrock_userguide/Amazon_Bedrock_Prompt.json: -------------------------------------------------------------------------------- 1 | { 2 | "system_prompt" : "You are an expert AI researcher specializing in cloud computing and AWS services. Your task is to extract information from the provided Amazon Bedrock references and generate precise, clear, and concise answers. Ensure that each answer is specific and comprehensive, reflecting the content of the document. If the references do not contain the necessary information to answer the any of those questions, respond with 'Sorry, I don't have sufficient information to provide an answer. There is no need to explain the reasoning behind your answers'", 3 | "examples" : [ 4 | { 5 | "example": "What are the Service Level Agreements (SLAs) for Amazon Bedrock IDE? Answer: Amazon Bedrock IDE within Amazon SageMaker Unified Studio is bound by the same SLAs as Amazon Bedrock. For more information, visit the Amazon Bedrock Service Level Agreement page." 6 | }, 7 | { 8 | "example": "What documentation and support resources are available for Amazon Bedrock IDE? Answer:To facilitate a smooth onboarding experience with Amazon Bedrock IDE in Amazon SageMaker Unified Studio, you can find detailed documentation on the Amazon Bedrock IDE User Guide. If you have any additional questions or need further assistance, please don't hesitate to reach out to your AWS account team." 9 | }, 10 | { 11 | "example": "What are the pricing and billing models for using Amazon Bedrock IDE? Answer: Amazon Bedrock IDE comes at no extra cost, and users only pay for the usage of the underlying resources that are required by the generative AI applications that they build. For example, customers will only pay for the associated model, Guardrail and Knowledge Base that they have used on their generative AI application. For more information, please visit the Amazon Bedrock pricing page." 12 | } 13 | ], 14 | "user_prompt" : "Now answer this questions based on the above retrieved references:" 15 | } -------------------------------------------------------------------------------- /dataset/medical_abstracts/medical_abstracts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/dataset/medical_abstracts/medical_abstracts.pdf -------------------------------------------------------------------------------- /docs/pricing-calculation.md: -------------------------------------------------------------------------------- 1 | Pricing Engine 2 | ============== 3 | 4 | The Pricing Engine estimates and calculates costs for running experiments with LLM models, covering key infrastructure and service components. 5 | 6 | Features 7 | -------- 8 | 9 | ### OpenSearch 10 | 11 | - Calculates based on the RPM (Requests Per Minute) of Bedrock models for indexing, retrieval, and evaluation 12 | 13 | ### ECS (Fargate) 14 | 15 | - Calculates ECS costs for indexing, retrieval, and evaluation tasks 16 | - Uses 8 vCPU, 16 GB memory configuration 17 | 18 | ### Bedrock Token Costs 19 | 20 | - **Embeddings**: Calculates costs based on the number of tokens processed 21 | - **Inference Input and Output**: Computes costs using token counts for both input and output during inference 22 | 23 | ### Additional Services 24 | 25 | - Includes percentage markup for costs from supporting services: 26 | - S3 27 | - DynamoDB 28 | - ECR 29 | - VPC 30 | 31 | Workflow 32 | -------- 33 | 34 | ### Directional Pricing 35 | 36 | Provides approximate pricing before experiments based on reasonable assumptions, including: 37 | 38 | Token Counts 39 | ------------ 40 | 41 | - Estimates token counts for: 42 | - Embeddings 43 | - Inference inputs and outputs 44 | - Based on: 45 | - Knowledge base 46 | - Ground truth 47 | - Chunking strategy 48 | - Number of tokens per chunk 49 | - Percentage overlap 50 | - Retrieval strategy 51 | - Number of prompts 52 | - KNN value 53 | 54 | Runtime Estimates 55 | ----------------- 56 | 57 | - Assumes average time per operation for: 58 | - Indexing 59 | - Retrieval 60 | - Evaluation tasks 61 | - Takes into account: 62 | - OpenSearch RPM limits 63 | - Bedrock RPM limits 64 | 65 | ### Estimation Cost 66 | 67 | Post-experiment, the Pricing Engine replaces initial assumptions with actual metrics: 68 | 69 | - Runtime per operation for: 70 | - Indexing 71 | - Retrieval 72 | - Evaluation 73 | - Overall experiment duration (from DynamoDB) 74 | - Exact token usage captured during: 75 | - Indexing processes 76 | - Retrieval processes -------------------------------------------------------------------------------- /evaluation/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM public.ecr.aws/lambda/python:3.9 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY evaluation/requirements.txt . 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY evaluation/ evaluation/ 18 | COPY util/ util/ 19 | COPY lambda_handlers/evaluation_handler.py . 20 | 21 | # Set environment variables 22 | ENV PYTHONPATH=/var/task 23 | ENV PYTHONUNBUFFERED=1 24 | 25 | # Lambda runtime will look for the handler function here 26 | CMD ["evaluation_handler.lambda_handler"] 27 | -------------------------------------------------------------------------------- /evaluation/eval.py: -------------------------------------------------------------------------------- 1 | from config.experimental_config import ExperimentalConfig 2 | from typing import List, Dict, Optional 3 | import logging 4 | from core.processors import EvalProcessor 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | def evaluate(experiment_config: ExperimentalConfig): 10 | try: 11 | EvalProcessor(experiment_config).evaluate() 12 | except Exception as e: 13 | logger.error(f"Error during evaluation: {e}") 14 | raise -------------------------------------------------------------------------------- /evaluation/fargate_evaluation.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM --platform=linux/amd64 python:3.9-slim 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY indexing/requirements.txt . 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY evaluation/ evaluation/ 18 | COPY util/ util/ 19 | COPY handlers/task_processor.py . 20 | COPY handlers/fargate_eval_handler.py . 21 | 22 | # Set environment variables 23 | ENV PYTHONPATH=/var/task 24 | ENV PYTHONUNBUFFERED=1 25 | 26 | CMD ["python", "fargate_eval_handler.py"] -------------------------------------------------------------------------------- /evaluation/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.87 2 | botocore==1.35.87 3 | fastapi==0.115.5 4 | langchain_community==0.3.7 5 | opensearch_py==2.7.1 6 | pydantic==2.9.2 7 | python-dotenv==1.0.1 8 | uvicorn==0.32.0 9 | ragas==0.2.13 10 | RapidFuzz==3.10.1 11 | rouge_score==0.1.2 12 | langchain_aws==0.2.7 -------------------------------------------------------------------------------- /flotorch-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/flotorch-arch.png -------------------------------------------------------------------------------- /handlers/fargate_eval_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from task_processor import FargateTaskProcessor 3 | from config.config import Config 4 | from core.service.experimental_config_service import ExperimentalConfigService 5 | from evaluation.eval import evaluate 6 | 7 | import logging 8 | logger = logging.getLogger() 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | 12 | class EvaluationProcessor(FargateTaskProcessor): 13 | def process(self): 14 | try: 15 | logger.info("Input data: %s", self.input_data) 16 | exp_config_data = self.input_data 17 | 18 | # Load base configuration 19 | config = Config.load_config() 20 | 21 | exp_config = ExperimentalConfigService(config).create_experimental_config(exp_config_data) 22 | logger.info("Into evaluate processor. Processing event: %s", json.dumps(exp_config_data)) 23 | 24 | evaluate(experiment_config=exp_config) 25 | 26 | 27 | self.send_task_success({ 28 | "status": "success" 29 | }) 30 | 31 | except Exception as e: 32 | logger.error(f"Error processing event: {str(e)}") 33 | self.send_task_failure({ 34 | "status": "failed", 35 | "errorMessage": str(e) 36 | }) 37 | 38 | 39 | def main(): 40 | try: 41 | fargate_processor = EvaluationProcessor() 42 | fargate_processor.process() 43 | except Exception as e: 44 | logger.error(f"Error processing event: {str(e)}") 45 | raise 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /handlers/fargate_indexing_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from task_processor import FargateTaskProcessor 3 | from config.config import Config 4 | from config.experimental_config import ExperimentalConfig 5 | from indexing.indexing import chunk_embed_store 6 | import logging 7 | from core.service.experimental_config_service import ExperimentalConfigService 8 | 9 | logger = logging.getLogger() 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | 13 | class IndexingProcessor(FargateTaskProcessor): 14 | def process(self): 15 | try: 16 | logger.info("Input data: %s", self.input_data) 17 | exp_config_data = self.input_data 18 | logger.info("Into indexing processor. Processing event: %s", json.dumps(exp_config_data)) 19 | 20 | # Load base configuration 21 | config = Config.load_config() 22 | 23 | exp_config = ExperimentalConfigService(config).create_experimental_config(exp_config_data) 24 | logger.info("Into indexing processor. Processing event: %s", json.dumps(exp_config_data)) 25 | 26 | chunk_embed_store(config, exp_config) 27 | 28 | self.send_task_success({ 29 | "status": "success" 30 | }) 31 | 32 | except Exception as e: 33 | logger.error(f"Error processing event: {str(e)}") 34 | self.send_task_failure({ 35 | "status": "failed", 36 | "errorMessage": str(e) 37 | }) 38 | 39 | 40 | def main(): 41 | try: 42 | fargate_processor = IndexingProcessor() 43 | fargate_processor.process() 44 | except Exception as e: 45 | logger.error(f"Error processing event: {str(e)}") 46 | raise 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /handlers/fargate_retriever_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from task_processor import FargateTaskProcessor 3 | from config.config import Config 4 | from config.experimental_config import ExperimentalConfig 5 | from core.service.experimental_config_service import ExperimentalConfigService 6 | import logging 7 | from retriever.retriever import retrieve 8 | 9 | logger = logging.getLogger() 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | 13 | class RetrieverProcessor(FargateTaskProcessor): 14 | def process(self): 15 | try: 16 | logger.info("Input data: %s", self.input_data) 17 | exp_config_data = self.input_data 18 | 19 | # Load base configuration 20 | config = Config.load_config() 21 | 22 | exp_config = ExperimentalConfigService(config).create_experimental_config(exp_config_data) 23 | logger.info("Into retriever processor. Processing event: %s", json.dumps(exp_config_data)) 24 | 25 | # Execute retrieve method 26 | retrieve(config, exp_config) 27 | 28 | self.send_task_success({ 29 | "status": "success" 30 | }) 31 | 32 | except Exception as e: 33 | logger.error(f"Error processing event: {str(e)}") 34 | self.send_task_failure({ 35 | "status": "failed", 36 | "errorMessage": str(e) 37 | }) 38 | 39 | 40 | def main(): 41 | try: 42 | fargate_processor = RetrieverProcessor() 43 | fargate_processor.process() 44 | except Exception as e: 45 | logger.error(f"Error processing event: {str(e)}") 46 | raise 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /handlers/task_processor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import boto3 4 | from botocore.exceptions import ClientError 5 | import logging 6 | 7 | logger = logging.getLogger() 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | class FargateTaskProcessor(): 11 | def __init__(self): 12 | self.sfn_client = boto3.client('stepfunctions') 13 | self.task_token = os.environ.get('TASK_TOKEN') 14 | event_data = os.environ.get('INPUT_DATA', '{}') 15 | if isinstance(event_data, str): 16 | self.input_data = json.loads(event_data) 17 | else: 18 | self.input_data = event_data 19 | logger.info(f"Input data: {self.input_data}") 20 | 21 | def process(self): 22 | raise NotImplementedError("Subclasses must implement the process method.") 23 | 24 | def send_task_success(self, output): 25 | try: 26 | self.sfn_client.send_task_success( 27 | taskToken=self.task_token, 28 | output=json.dumps(output) 29 | ) 30 | except ClientError as e: 31 | logger.error(f"Error sending task success: {str(e)}") 32 | raise 33 | 34 | def send_task_failure(self, error): 35 | try: 36 | self.sfn_client.send_task_failure( 37 | taskToken=self.task_token, 38 | error='TaskProcessingError', 39 | cause=error.get('errorMessage') 40 | ) 41 | except ClientError as e: 42 | logger.error(f"Error sending task failure: {str(e)}") 43 | raise -------------------------------------------------------------------------------- /indexing/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM public.ecr.aws/lambda/python:3.9 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY indexing/requirements.txt . 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY indexing/ indexing/ 18 | COPY util/ util/ 19 | COPY constants/ constants/ 20 | COPY lambda_handlers/indexing_handler.py . 21 | 22 | # Set environment variables 23 | ENV PYTHONPATH=/var/task 24 | ENV PYTHONUNBUFFERED=1 25 | 26 | # Lambda runtime will look for the handler function here 27 | CMD ["indexing_handler.lambda_handler"] 28 | -------------------------------------------------------------------------------- /indexing/__init__.py: -------------------------------------------------------------------------------- 1 | from .indexing import chunk_embed_store 2 | -------------------------------------------------------------------------------- /indexing/fargate_indexing.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM --platform=linux/amd64 python:3.9-slim 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY indexing/requirements.txt . 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY indexing/ indexing/ 18 | COPY util/ util/ 19 | COPY handlers/task_processor.py . 20 | COPY handlers/fargate_indexing_handler.py . 21 | 22 | # Set environment variables 23 | ENV PYTHONPATH=/var/task 24 | ENV PYTHONUNBUFFERED=1 25 | 26 | CMD ["python", "fargate_indexing_handler.py"] -------------------------------------------------------------------------------- /indexing/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.87 2 | botocore==1.35.87 3 | langchain 4 | PyPDF2 5 | langchain-aws 6 | llama-index 7 | python-dotenv 8 | opensearch-py 9 | sagemaker 10 | ragas==0.2.6 11 | langchain_aws==0.2.7 12 | pymupdf -------------------------------------------------------------------------------- /lambda_handlers/cost_handler/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.9 2 | 3 | # Create and set working directory 4 | WORKDIR /var/task 5 | 6 | # Copy requirements file 7 | COPY cost_handler/requirements.txt . 8 | 9 | # Install dependencies 10 | RUN pip install --no-cache-dir -r requirements.txt --target . 11 | 12 | # Copy the necessary files and directories 13 | COPY cost_handler/cost_compute_handler.py . 14 | COPY cost_handler/pricing.py . 15 | COPY cost_handler/utils.py . 16 | 17 | # Set environment variables 18 | ENV PYTHONPATH=/var/task 19 | ENV PYTHONUNBUFFERED=1 20 | 21 | # Set the CMD to your handler 22 | CMD ["cost_compute_handler.lambda_handler"] -------------------------------------------------------------------------------- /lambda_handlers/cost_handler/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3 2 | pandas 3 | python-dotenv 4 | python-dateutil -------------------------------------------------------------------------------- /lambda_handlers/cost_handler/utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import csv 3 | import io 4 | import logging 5 | from botocore.exceptions import ClientError 6 | from typing import Optional 7 | import pandas as pd 8 | from datetime import datetime, timezone 9 | from decimal import Decimal 10 | 11 | def read_csv_from_s3(object_key: str, bucket_name: str, as_dataframe: bool = True) -> Optional[object]: 12 | """ 13 | Read CSV data from S3 and convert it to a list of dictionaries or a pandas DataFrame. 14 | Args: 15 | object_key (str): The key (path) of the S3 object. 16 | bucket_name (str): The name of the S3 bucket. 17 | as_dataframe (bool): If True, return a pandas DataFrame, otherwise return a list of dictionaries. 18 | Returns: 19 | Optional[object]: List of dictionaries or pandas DataFrame containing the CSV data if successful, None otherwise. 20 | Raises: 21 | ClientError: If there's an error accessing S3. 22 | csv.Error: If the content cannot be parsed as CSV. 23 | """ 24 | logger = logging.getLogger() 25 | logger.setLevel(logging.INFO) 26 | 27 | # Initialize the S3 client 28 | s3_client = boto3.client('s3') 29 | 30 | try: 31 | logger.info(f"Reading file from S3: Bucket={bucket_name}, Key={object_key}") 32 | 33 | # Get the object from S3 34 | response = s3_client.get_object(Bucket=bucket_name, Key=object_key) 35 | 36 | # Read the data 37 | file_content = response['Body'].read().decode('utf-8') 38 | 39 | if as_dataframe: 40 | # Parse content into a pandas DataFrame 41 | csv_data = pd.read_csv(io.StringIO(file_content), float_precision="round_trip") 42 | else: 43 | # Parse content into a list of dictionaries 44 | csv_reader = csv.DictReader(file_content.splitlines()) 45 | csv_data = [row for row in csv_reader] 46 | 47 | return csv_data 48 | 49 | except ClientError as e: 50 | error_code = e.response.get('Error', {}).get('Code', 'Unknown') 51 | error_message = e.response.get('Error', {}).get('Message', 'Unknown error') 52 | logger.error(f"Failed to read from S3: {error_code} - {error_message}", exc_info=True) 53 | raise 54 | 55 | except csv.Error as e: 56 | logger.error(f"Failed to parse CSV content from {bucket_name}/{object_key}", exc_info=True) 57 | raise 58 | 59 | except Exception as e: 60 | logger.error(f"Unexpected error reading from S3: {str(e)}", exc_info=True) 61 | raise 62 | 63 | def parse_datetime(datetime_str): 64 | if not datetime_str: 65 | return None 66 | try: 67 | dt = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S.%fZ') 68 | except ValueError: 69 | dt = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S%z') 70 | 71 | if dt.tzinfo is None: 72 | dt = dt.replace(tzinfo=timezone.utc) 73 | return dt 74 | 75 | # Convert all float values in the dictionary to Decimal 76 | def convert_floats_to_decimal(obj): 77 | if isinstance(obj, float): 78 | return Decimal(str(obj)) # Convert float to string first to prevent precision loss 79 | elif isinstance(obj, dict): 80 | return {k: convert_floats_to_decimal(v) for k, v in obj.items()} 81 | elif isinstance(obj, list): 82 | return [convert_floats_to_decimal(i) for i in obj] 83 | else: 84 | return obj -------------------------------------------------------------------------------- /lambda_handlers/evaluation_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, Any 3 | from config.config import Config 4 | from config.experimental_config import ExperimentalConfig 5 | from evaluation.eval import evaluate 6 | import logging 7 | 8 | logger = logging.getLogger() 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: 12 | """ 13 | Lambda handler to invoke the eval method. 14 | 15 | Args: 16 | event (Dict[str, Any]): Lambda event containing configuration parameters 17 | context (Any): Lambda context object 18 | 19 | Returns: 20 | Dict[str, Any]: Response containing status and message 21 | """ 22 | try: 23 | 24 | # Extract experimental configuration from event 25 | logger.info("Processing event: %s", json.dumps(event)) 26 | exp_config_data = event 27 | exp_config = ExperimentalConfig( 28 | execution_id=exp_config_data.get('execution_id'), 29 | experiment_id=exp_config_data.get('experiment_id'), 30 | embedding_model=exp_config_data.get('embedding_model'), 31 | retrieval_model=exp_config_data.get('retrieval_model'), 32 | vector_dimension=exp_config_data.get('vector_dimension'), 33 | gt_data=exp_config_data.get('gt_data'), 34 | index_id=exp_config_data.get('index_id'), 35 | knn_num=exp_config_data.get('knn_num'), 36 | temp_retrieval_llm=exp_config_data.get('temp_retrieval_llm'), 37 | embedding_service=exp_config_data.get('embedding_service'), 38 | retrieval_service=exp_config_data.get('retrieval_service'), 39 | aws_region=exp_config_data.get('aws_region'), 40 | chunking_strategy=exp_config_data.get('chunking_strategy'), 41 | chunk_size=exp_config_data.get('chunk_size'), 42 | chunk_overlap=exp_config_data.get('chunk_overlap'), 43 | hierarchical_parent_chunk_size=exp_config_data.get('hierarchical_parent_chunk_size'), 44 | hierarchical_child_chunk_size=exp_config_data.get('hierarchical_child_chunk_size'), 45 | hierarchical_chunk_overlap_percentage=exp_config_data.get('hierarchical_chunk_overlap_percentage'), 46 | kb_data=exp_config_data.get('kb_data'), 47 | n_shot_prompts=exp_config_data.get('n_shot_prompts'), 48 | n_shot_prompt_guide=exp_config_data.get('n_shot_prompt_guide'), 49 | indexing_algorithm=exp_config_data.get('indexing_algorithm'), 50 | knowledge_base=exp_config_data.get('knowledge_base', False), 51 | eval_service=exp_config_data.get('eval_service', "ragas"), 52 | eval_embedding_model=exp_config_data.get('eval_embedding_model', "amazon.titan-embed-text-v1"), #amazon.nova-pro-v1:0 53 | eval_retrieval_model=exp_config_data.get('eval_retrieval_model', "mistral.mixtral-8x7b-instruct-v0:1"), 54 | ) 55 | logger.info("Processing event: %s", json.dumps(event)) 56 | 57 | 58 | evaluate(exp_config) 59 | 60 | return { 61 | "status": "success" 62 | } 63 | except Exception as e: 64 | logger.error("Error in lambda_handler: %s", str(e)) 65 | return { 66 | "status": "failed", 67 | "errorMessage": str(e) 68 | } 69 | 70 | -------------------------------------------------------------------------------- /lambda_handlers/indexing_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, Any 3 | from config.config import Config 4 | from config.experimental_config import ExperimentalConfig 5 | from indexing.indexing import chunk_embed_store 6 | import logging 7 | 8 | logger = logging.getLogger() 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: 12 | """ 13 | Lambda handler to invoke the retrieve method. 14 | 15 | Args: 16 | event (Dict[str, Any]): Lambda event containing configuration parameters 17 | context (Any): Lambda context object 18 | 19 | Returns: 20 | Dict[str, Any]: Response containing execution status and details 21 | """ 22 | try: 23 | # Validate input parameters 24 | 25 | # Extract experimental configuration from event 26 | logger.info("Processing event: %s", json.dumps(event)) 27 | exp_config_data = event 28 | exp_config = ExperimentalConfig( 29 | execution_id=exp_config_data.get('execution_id'), 30 | experiment_id=exp_config_data.get('experiment_id'), 31 | embedding_model=exp_config_data.get('embedding_model'), 32 | retrieval_model=exp_config_data.get('retrieval_model'), 33 | vector_dimension=exp_config_data.get('vector_dimension'), 34 | gt_data=exp_config_data.get('gt_data'), 35 | index_id=exp_config_data.get('index_id'), 36 | knn_num=exp_config_data.get('knn_num'), 37 | temp_retrieval_llm=exp_config_data.get('temp_retrieval_llm'), 38 | embedding_service=exp_config_data.get('embedding_service'), 39 | retrieval_service=exp_config_data.get('retrieval_service'), 40 | aws_region=exp_config_data.get('aws_region'), 41 | chunking_strategy=exp_config_data.get('chunking_strategy'), 42 | chunk_size=exp_config_data.get('chunk_size'), 43 | chunk_overlap=exp_config_data.get('chunk_overlap'), 44 | hierarchical_parent_chunk_size=exp_config_data.get('hierarchical_parent_chunk_size'), 45 | hierarchical_child_chunk_size=exp_config_data.get('hierarchical_child_chunk_size'), 46 | hierarchical_chunk_overlap_percentage=exp_config_data.get('hierarchical_chunk_overlap_percentage'), 47 | kb_data=exp_config_data.get('kb_data'), 48 | n_shot_prompts=exp_config_data.get('n_shot_prompts'), 49 | n_shot_prompt_guide=exp_config_data.get('n_shot_prompt_guide'), 50 | indexing_algorithm=exp_config_data.get('indexing_algorithm') 51 | ) 52 | logger.info("Processing event: %s", json.dumps(event)) 53 | 54 | # Load base configuration 55 | config = Config.load_config() 56 | 57 | # Execute retrieve method 58 | chunk_embed_store(config, exp_config) 59 | 60 | return { 61 | **event, # Pass the entire input event 62 | "status": "success" 63 | } 64 | except Exception as e: 65 | logger.error("Error processing event: %s", str(e)) 66 | return { 67 | **event, # Pass the entire input event 68 | "status": "failed", 69 | "errorMessage": str(e) 70 | } 71 | 72 | -------------------------------------------------------------------------------- /lambda_handlers/opensearch_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, Any, List 3 | from config.config import Config 4 | from core.opensearch_vectorstore import OpenSearchVectorDatabase 5 | import logging 6 | from dataclasses import dataclass 7 | from util.dynamo_utils import deserialize_dynamodb_json 8 | from opensearch.opensearch_index_manager import OpenSearchIndexManager 9 | 10 | logger = logging.getLogger(__name__) 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: 14 | """ 15 | Lambda handler to create OpenSearch indices based on experiment configurations. 16 | 17 | Args: 18 | event (Dict[str, Any]): Lambda event containing experiment configurations 19 | context (Any): Lambda context object 20 | 21 | Returns: 22 | Dict[str, Any]: Response containing execution status and details 23 | """ 24 | try: 25 | logger.info("Processing event: %s", json.dumps(event)) 26 | 27 | # Initialize handler with configuration 28 | config = Config.load_config() 29 | opensearch_indexer = OpenSearchIndexManager(config) 30 | 31 | # Create indices 32 | opensearch_indexer.create_indices(event) 33 | 34 | return {"status": "success"} 35 | 36 | except Exception as e: 37 | logger.error("Error processing event: %s", str(e)) 38 | return { 39 | "status": "failed", 40 | "errorMessage": str(e) 41 | } -------------------------------------------------------------------------------- /lambda_handlers/retriever_handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict, Any 3 | from retriever.retriever import retrieve 4 | from config.config import Config 5 | from config.experimental_config import ExperimentalConfig 6 | import logging 7 | 8 | 9 | logger = logging.getLogger() 10 | logger.setLevel(logging.INFO) 11 | 12 | def lambda_handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]: 13 | """ 14 | Lambda handler to invoke the retrieve method. 15 | 16 | Args: 17 | event (Dict[str, Any]): Lambda event containing configuration parameters 18 | context (Any): Lambda context object 19 | 20 | Returns: 21 | Dict[str, Any]: Response containing execution status and details 22 | """ 23 | try: 24 | # Extract experimental configuration from event 25 | logger.info("Processing event: %s", json.dumps(event)) 26 | exp_config_data = event 27 | exp_config = ExperimentalConfig( 28 | execution_id=exp_config_data.get('execution_id'), 29 | experiment_id=exp_config_data.get('experiment_id'), 30 | embedding_model=exp_config_data.get('embedding_model'), 31 | retrieval_model=exp_config_data.get('retrieval_model'), 32 | vector_dimension=exp_config_data.get('vector_dimension'), 33 | gt_data=exp_config_data.get('gt_data'), 34 | index_id=exp_config_data.get('index_id'), 35 | knn_num=exp_config_data.get('knn_num'), 36 | temp_retrieval_llm=exp_config_data.get('temp_retrieval_llm'), 37 | embedding_service=exp_config_data.get('embedding_service'), 38 | retrieval_service=exp_config_data.get('retrieval_service'), 39 | aws_region=exp_config_data.get('aws_region'), 40 | chunking_strategy=exp_config_data.get('chunking_strategy'), 41 | chunk_size=exp_config_data.get('chunk_size'), 42 | chunk_overlap=exp_config_data.get('chunk_overlap'), 43 | hierarchical_parent_chunk_size=exp_config_data.get('hierarchical_parent_chunk_size'), 44 | hierarchical_child_chunk_size=exp_config_data.get('hierarchical_child_chunk_size'), 45 | hierarchical_chunk_overlap_percentage=exp_config_data.get('hierarchical_chunk_overlap_percentage'), 46 | kb_data=exp_config_data.get('kb_data'), 47 | n_shot_prompts=exp_config_data.get('n_shot_prompts'), 48 | n_shot_prompt_guide=exp_config_data.get('n_shot_prompt_guide'), 49 | n_shot_prompt_guide_obj=exp_config_data.get('n_shot_prompt_guide_obj'), 50 | indexing_algorithm=exp_config_data.get('indexing_algorithm'), 51 | enable_guardrails=exp_config_data.get('enable_guardrails', False), 52 | guardrail_id=exp_config_data.get('guardrail_id'), 53 | guardrail_version=exp_config_data.get('guardrail_version'), 54 | enable_prompt_guardrails=exp_config_data.get('enable_prompt_guardrails', False), 55 | enable_context_guardrails=exp_config_data.get('enable_context_guardrails', False), 56 | enable_response_guardrails=exp_config_data.get('enable_response_guardrails', False), 57 | bedrock_knowledge_base=exp_config_data.get('bedrock_knowledge_base', False), 58 | knowledge_base=exp_config_data.get('knowledge_base', False) 59 | ) 60 | 61 | # Load base configuration 62 | config = Config.load_config() 63 | 64 | # Execute retrieve method 65 | retrieve(config, exp_config) 66 | 67 | return { 68 | "status": "success", 69 | 'statusCode': 200, 70 | 'body': json.dumps({ 71 | 'message': 'Retrieval process completed successfully', 72 | 'executionId': exp_config.execution_id, 73 | 'experimentId': exp_config.experiment_id 74 | }) 75 | } 76 | 77 | except ValueError as ve: 78 | logger.error("Validation Error: %s", str(ve)) 79 | return { 80 | "status": "failed", 81 | 'statusCode': 400, 82 | 'body': json.dumps({ 83 | 'error': 'Validation Error', 84 | 'message': str(ve) 85 | }) 86 | } 87 | except Exception as e: 88 | logger.error("Internal Server Error: %s", str(e)) 89 | return { 90 | "status": "failed", 91 | 'statusCode': 500, 92 | 'body': json.dumps({ 93 | 'error': 'Internal Server Error', 94 | 'message': str(e) 95 | }) 96 | } 97 | -------------------------------------------------------------------------------- /opensearch/opensearch.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY opensearch/opensearch_requirements.txt ./requirements.txt 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY util/ util/ 18 | COPY opensearch/ opensearch/ 19 | COPY lambda_handlers/opensearch_handler.py . 20 | 21 | # Set environment variables 22 | ENV PYTHONPATH=/var/task 23 | ENV PYTHONUNBUFFERED=1 24 | 25 | # Lambda runtime will look for the handler function here 26 | CMD ["opensearch_handler.lambda_handler"] 27 | -------------------------------------------------------------------------------- /opensearch/opensearch_requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.87 2 | botocore==1.35.87 3 | opensearch_py==2.7.1 4 | pydantic==2.9.2 5 | python-dotenv==1.0.1 6 | -------------------------------------------------------------------------------- /release_notes/Release v1.0.2.md: -------------------------------------------------------------------------------- 1 | ## Release v1.0.2 - 2024-12-31 2 | 3 | ### New Features 4 | - Added two new indexing algorithms HNSW SQ and HNSQ BQ 5 | 6 | When to use each option? 7 | 8 | HNSW SQ: Best choice when you want a balance between saving memory and getting very precise results. 9 | HNSW BQ: Best choice when you need to save as much memory as possible at the cost of accuracy. Ideal for large scale datasets. 10 | 11 | Where to select? 12 | 13 | Available in Indexing Algorithms dropdown on Indexing Startegy page 14 | ![Indexing Algorithms](./images/Indexing_Algorthm_HNSW_BQ_SQ.png?raw=true) 15 | 16 | - Introduced Hierarchical Chunking option in the Chunking Strategy dropdown menu 17 | 18 | Hierarchical chunking organizes information into smaller, nested pieces (child chunks) within larger, broader pieces (parent chunks). You can define: 19 | 20 | - Parent Chunk Size: The size of the broader chunks. 21 | - Child Chunk Size: The size of the smaller chunks. 22 | - Chunk Overlap Percentage: The shared content between consecutive child chunks to ensure context continuity. 23 | 24 | During retrieval, the system first fetches child chunks and swaps them with their corresponding parent chunks to provide a more complete understanding of the content. This ensures a balance between precision and context for optimal retrieval performance. 25 | 26 | When to use? 27 | - Ideal for Long structured documents (e.g., manuals, research papers) 28 | - For Summarization where complete understanding of context is needed 29 | 30 | Where to select? 31 | 32 | Available in Chunking dropdown on Indexing Startegy page 33 | ![Hierarchical Chunking](./images/Hierarchical_Chunking.png?raw=true) 34 | 35 | 36 | - Integrated Re-Ranking capability 37 | 38 | Re-ranking is an optional step in the RAG pipeline that can significantly improve the quality of the generated output. 39 | 40 | Retrieval without Re-ranking: 41 | 42 | - Documents are retrieved from the vector store. 43 | - These documents are then fed directly to the generative model, which generates an output based on the information it finds in these documents. 44 | 45 | Retrieval with Re-ranking: 46 | 47 | - Documents are retrieved from the vector store. 48 | - A re-ranking model then analyzes these documents and reorders them based on their relevance to the query. 49 | - The reordered documents are then fed to the generative model, which generates an output based on the information it finds in these documents. 50 | 51 | Supported Re-Ranking models: 52 | - Amazon Rerank 1.0 53 | - Cohere Rerank 3.5 54 | 55 | Supported Regions: 56 | - us-west-2 57 | 58 | Where to select? 59 | ![ReRanking](./images/ReRanking.png?raw=true) 60 | 61 | 62 | ### Enhancements 63 | - Experiment Parallelization 64 | 65 | - In previous versions, experiments with different models (across indexing, retrieval, and evaluation) could run in parallel, but experiments using the same model were limited to sequential execution. 66 | - In v1.0.2, this limitation is removed, allowing multiple experiments per model to run concurrently. This significantly accelerates overall project completion time. 67 | 68 | - Valid Experiments are now processed asynchronously. 69 | 70 | - Breadcrumbs are now included in the header section. 71 | 72 | ### Bug Fixes 73 | 74 | - Resolved an issue where inferencing LLM details were not visible in the Experiment Details popup. 75 | - Updated the Experiment Details popup to ensure responsiveness. 76 | 77 | ### Known Issues 78 | 79 | - Missing error handling when a corrupted pdf file is uploaded. 80 | - Discrepancy between directional pricing and estimated cost in few scenarios. 81 | - The Valid Experiments page experiences delays when uploading large datasets or selecting all hyperparameters. 82 | - The Titan embeddings G1 model displays an estimated cost of $0 in evaluation metrics. 83 | - NAN values are being populated for faithfulness in the evaluation metrics. 84 | - Validation messages for certain fields on the UI are incorrect. -------------------------------------------------------------------------------- /release_notes/images/Hierarchical_Chunking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/release_notes/images/Hierarchical_Chunking.png -------------------------------------------------------------------------------- /release_notes/images/Indexing_Algorthm_HNSW_BQ_SQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/release_notes/images/Indexing_Algorthm_HNSW_BQ_SQ.png -------------------------------------------------------------------------------- /release_notes/images/ReRanking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/release_notes/images/ReRanking.png -------------------------------------------------------------------------------- /retriever/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM public.ecr.aws/lambda/python:3.9 2 | 3 | # Create and set working directory 4 | WORKDIR /var/task 5 | 6 | # Copy requirements file 7 | COPY retriever/requirements.txt . 8 | 9 | # Install dependencies 10 | RUN pip install --no-cache-dir -r requirements.txt --target . 11 | 12 | # Copy the necessary files and directories 13 | COPY baseclasses/ baseclasses/ 14 | COPY config/ config/ 15 | COPY core/ core/ 16 | COPY retriever/ retriever/ 17 | COPY util/ util/ 18 | COPY lambda_handlers/retriever_handler.py . 19 | 20 | # Set environment variables 21 | ENV PYTHONPATH=/var/task 22 | ENV PYTHONUNBUFFERED=1 23 | 24 | # Set the CMD to your handler 25 | CMD ["retriever_handler.lambda_handler"] 26 | -------------------------------------------------------------------------------- /retriever/fargate_retriever.Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the official AWS Lambda Python 3.9 runtime base image 2 | FROM --platform=linux/amd64 python:3.9-slim 3 | 4 | # Create and set the working directory inside the container 5 | WORKDIR /var/task 6 | 7 | # Copy the requirements file to the working directory 8 | COPY retriever/requirements.txt . 9 | 10 | # Install dependencies into the /var/task directory (where Lambda expects them) 11 | RUN pip install --no-cache-dir -r requirements.txt --target . 12 | 13 | # Copy the necessary files and directories 14 | COPY baseclasses/ baseclasses/ 15 | COPY config/ config/ 16 | COPY core/ core/ 17 | COPY retriever/ retriever/ 18 | COPY util/ util/ 19 | COPY handlers/task_processor.py . 20 | COPY handlers/fargate_retriever_handler.py . 21 | 22 | # Set environment variables 23 | ENV PYTHONPATH=/var/task 24 | ENV PYTHONUNBUFFERED=1 25 | 26 | CMD ["python", "fargate_retriever_handler.py"] -------------------------------------------------------------------------------- /retriever/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.87 2 | botocore==1.35.87 3 | fastapi==0.115.5 4 | langchain_community==0.3.7 5 | opensearch_py==2.7.1 6 | pydantic==2.9.2 7 | python-dotenv==1.0.1 8 | sagemaker==2.235.2 9 | ragas==0.2.6 10 | langchain_aws==0.2.7 -------------------------------------------------------------------------------- /test/FloTorch.ai TestReport.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/test/FloTorch.ai TestReport.pdf -------------------------------------------------------------------------------- /test/Integration Test Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/test/Integration Test Report.pdf -------------------------------------------------------------------------------- /test/Test Plan for FloTorch.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/test/Test Plan for FloTorch.pdf -------------------------------------------------------------------------------- /ui/.env.example: -------------------------------------------------------------------------------- 1 | NUXT_API_ENDPOINT=YOUR_DEV_API_ENDPOINT_NOT_REQUIRED_FOR_PROD 2 | -------------------------------------------------------------------------------- /ui/.gitignore: -------------------------------------------------------------------------------- 1 | # Nuxt dev/build outputs 2 | .output 3 | .data 4 | .nuxt 5 | .nitro 6 | .cache 7 | dist 8 | 9 | # Node dependencies 10 | node_modules 11 | 12 | # Logs 13 | logs 14 | *.log 15 | 16 | # Misc 17 | .DS_Store 18 | .fleet 19 | .idea 20 | 21 | # Local env files 22 | .env 23 | .env.* 24 | !.env.example 25 | -------------------------------------------------------------------------------- /ui/.ncurc.json: -------------------------------------------------------------------------------- 1 | { 2 | "removeRange": true, 3 | "reject": [ 4 | "typescript" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /ui/.npmrc: -------------------------------------------------------------------------------- 1 | shamefully-hoist = true 2 | -------------------------------------------------------------------------------- /ui/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.exclude": { 3 | ".nuxt": true, 4 | ".output": true 5 | }, 6 | "typescript.tsdk": "node_modules/typescript/lib", 7 | "files.associations": { 8 | "*.css": "tailwindcss" 9 | }, 10 | "editor.quickSuggestions": { 11 | "strings": "on" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /ui/README.md: -------------------------------------------------------------------------------- 1 | # Nuxt Minimal Starter 2 | 3 | Look at the [Nuxt documentation](https://nuxt.com/docs/getting-started/introduction) to learn more. 4 | 5 | ## Setup 6 | 7 | Make sure to install dependencies: 8 | 9 | ```bash 10 | # npm 11 | npm install 12 | 13 | # pnpm 14 | pnpm install 15 | 16 | # yarn 17 | yarn install 18 | 19 | # bun 20 | bun install 21 | ``` 22 | 23 | ## Development Server 24 | 25 | Start the development server on `http://localhost:3000`: 26 | 27 | ```bash 28 | # npm 29 | npm run dev 30 | 31 | # pnpm 32 | pnpm dev 33 | 34 | # yarn 35 | yarn dev 36 | 37 | # bun 38 | bun run dev 39 | ``` 40 | 41 | ## Production 42 | 43 | Build the application for production: 44 | 45 | ```bash 46 | # npm 47 | npm run build 48 | 49 | # pnpm 50 | pnpm build 51 | 52 | # yarn 53 | yarn build 54 | 55 | # bun 56 | bun run build 57 | ``` 58 | 59 | Locally preview production build: 60 | 61 | ```bash 62 | # npm 63 | npm run preview 64 | 65 | # pnpm 66 | pnpm preview 67 | 68 | # yarn 69 | yarn preview 70 | 71 | # bun 72 | bun run preview 73 | ``` 74 | 75 | Check out the [deployment documentation](https://nuxt.com/docs/getting-started/deployment) for more information. 76 | -------------------------------------------------------------------------------- /ui/app/app.config.ts: -------------------------------------------------------------------------------- 1 | export default defineAppConfig({ 2 | ui: { 3 | colors: { 4 | primary: "blue", 5 | }, 6 | form: { 7 | base: "space-y-3", 8 | }, 9 | input: { 10 | slots: { 11 | root: "w-full", 12 | }, 13 | defaultVariants: { 14 | // @ts-expect-error type inference 15 | size: "xl", 16 | }, 17 | }, 18 | selectMenu: { 19 | defaultVariants: { 20 | // @ts-expect-error type inference 21 | size: "xl", 22 | }, 23 | slots : { 24 | group: 'p-1 isolate-custom', 25 | item : [ 26 | 'custom-options-group w-full' 27 | ], 28 | input: 'h-6', 29 | } 30 | }, 31 | inputNumber: { 32 | slots: { 33 | root: "w-full", 34 | }, 35 | }, 36 | table: { 37 | slots: { 38 | td: "!whitespace-normal", 39 | tr: "data-[selected=true]:bg-[#f0fbff] data-[selected=true]:border-[#006ce0]" 40 | }, 41 | compoundVariants : [ 42 | { 43 | loading: true, 44 | loadingColor: 'primary', 45 | class: { 46 | thead: 'after:bg-blue-300 ' 47 | } 48 | }, 49 | ] 50 | }, 51 | separator: { 52 | variants: { 53 | vertical: { 54 | class: 'border-color-red' 55 | } 56 | } 57 | }, 58 | formField: { 59 | variants: { 60 | required: { 61 | true: { 62 | label: "after:content-[''] after:ms-0 after:text-(--ui-error)" 63 | } 64 | } 65 | } 66 | }, 67 | tabs: { 68 | slots : { 69 | root : "gap-2", 70 | list: "custom-tab-list-group", 71 | indicator : "h-10px custom-tab-indicator", 72 | trigger : ['custom-tabs-trigger'], 73 | content : "focus-outline" 74 | } 75 | }, 76 | tooltip : { 77 | slots : { 78 | content : "arrow_box" 79 | } 80 | }, 81 | // dropdownMenu : { 82 | // slots : { 83 | // itemTrailingIcon: "secondery-color" 84 | // }, 85 | // variants : [ 86 | // { 87 | // color: 'primary', 88 | // active: true, 89 | // class: { 90 | // item: "text-red-500" 91 | // } 92 | // } 93 | // ] 94 | // // variants: { 95 | // // active: { 96 | // // true: { 97 | // // item: "before:bg-red-500" 98 | // // } 99 | // // } 100 | // // } 101 | // }, 102 | checkbox : { 103 | slots : { 104 | base : "", 105 | }, 106 | 107 | compoundVariants : [ 108 | { 109 | color: 'primary', 110 | checked: true, 111 | class: 'secondery-color ' 112 | }, 113 | ] 114 | 115 | 116 | }, 117 | card : { 118 | slots : { 119 | root : 'rounded-[16px]' 120 | } 121 | } 122 | }, 123 | }); 124 | -------------------------------------------------------------------------------- /ui/app/app.vue: -------------------------------------------------------------------------------- 1 | 28 | 29 | 39 | -------------------------------------------------------------------------------- /ui/app/components/Breadcumb.vue: -------------------------------------------------------------------------------- 1 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /ui/app/components/DownloadResultsButton.vue: -------------------------------------------------------------------------------- 1 | 83 | 84 | -------------------------------------------------------------------------------- /ui/app/components/FetchKbModels.vue: -------------------------------------------------------------------------------- 1 | 45 | 46 | -------------------------------------------------------------------------------- /ui/app/components/FieldTooltip.vue: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 21 | 26 | -------------------------------------------------------------------------------- /ui/app/components/File/Upload.vue: -------------------------------------------------------------------------------- 1 | 94 | 95 | 96 | 97 | 103 | -------------------------------------------------------------------------------- /ui/app/components/File/UploadKb.vue: -------------------------------------------------------------------------------- 1 | 39 | 40 | 41 | 42 | 59 | -------------------------------------------------------------------------------- /ui/app/components/ModelSelect.vue: -------------------------------------------------------------------------------- 1 | 22 | 23 | 24 | 25 | 28 | -------------------------------------------------------------------------------- /ui/app/components/Page.vue: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 21 | 43 | -------------------------------------------------------------------------------- /ui/app/components/Project/DownloadConfigButton.vue: -------------------------------------------------------------------------------- 1 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /ui/app/components/Project/Experiment/Assessments.vue: -------------------------------------------------------------------------------- 1 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /ui/app/components/Project/Experiment/DirectionalPricing.vue: -------------------------------------------------------------------------------- 1 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /ui/app/components/Project/UploadConfigButton.vue: -------------------------------------------------------------------------------- 1 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /ui/app/components/PromptGuideHelp.vue: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 43 | -------------------------------------------------------------------------------- /ui/app/components/PromptGuideSelect.vue: -------------------------------------------------------------------------------- 1 | 80 | 81 | 87 | -------------------------------------------------------------------------------- /ui/app/components/RegionSelect.vue: -------------------------------------------------------------------------------- 1 | 10 | 11 | 12 | 13 | 16 | -------------------------------------------------------------------------------- /ui/app/components/VectorDimensionSelect.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 11 | 12 | 15 | -------------------------------------------------------------------------------- /ui/app/composables/api.ts: -------------------------------------------------------------------------------- 1 | export const useApi = $fetch.create({ 2 | baseURL: "/api", 3 | }); -------------------------------------------------------------------------------- /ui/app/composables/fileoperation.ts: -------------------------------------------------------------------------------- 1 | import slugify from 'slugify' 2 | 3 | export const useFileSlugifyOperation = (file: File) => { 4 | return slugify(file.name, { replacement: '_', lower: true }) 5 | } 6 | 7 | 8 | -------------------------------------------------------------------------------- /ui/app/composables/projects.ts: -------------------------------------------------------------------------------- 1 | import type { Guardrail } from "~~/shared/types/projects.type"; 2 | 3 | export const useProjects = (data?: ProjectsListQuery) => { 4 | return useApi("/execution", { 5 | query: data, 6 | }); 7 | }; 8 | 9 | export const useProject = (id: string) => { 10 | return useApi(`/execution/${id}`); 11 | }; 12 | 13 | export const useProjectExperiments = (id: string) => { 14 | return useApi(`/execution/${id}/experiment`); 15 | }; 16 | 17 | export const useProjectExperiment = (projectId: string, id: string) => { 18 | return useApi(`/execution/${projectId}/experiment/${id}`); 19 | }; 20 | 21 | export const useProjectCreate = (data: Record) => { 22 | return useApi<{ execution_id: string }>("/execution", { 23 | method: "POST", 24 | body: data, 25 | }); 26 | }; 27 | 28 | export const useProjectExecute = (id: string) => { 29 | return useApi<{ execution_id: string }>(`/execution/${id}/execute`, { 30 | method: "POST", 31 | }); 32 | }; 33 | 34 | 35 | export const useProjectValidExperiments = (id: string) => { 36 | return useApi<{message: string}>(`/execution/${id}/valid_experiment`); 37 | }; 38 | 39 | export const useProjectValidExperimentsByPoll = (id: string) => { 40 | // required to access the response.code, so that i have used useApi.raw below 41 | return useApi.raw(`/execution/${id}/valid_experiment/poll`); 42 | }; 43 | 44 | 45 | export const useProjectCreateExperiments = ( 46 | id: string, 47 | data: ValidExperiment[] 48 | ) => { 49 | return useApi<{ execution_id: string }>(`/execution/${id}/experiment`, { 50 | method: "POST", 51 | body: data, 52 | }); 53 | }; 54 | 55 | // export const usePresignedUploadUrl = () => { 56 | // return useApi<{ 57 | // kb_data: { path: string; presignedurl: string }; 58 | // gt_data: { path: string; presignedurl: string }; 59 | // uuid: string; 60 | // }>("presignedurl"); 61 | // }; 62 | 63 | export const usePresignedUploadUrl = (uuid: string) => { 64 | return useApi<{ 65 | gt_data: { path: string; presignedurl: string }; 66 | }>("presignedurl", { 67 | method: "POST", 68 | body: { unique_id: uuid } 69 | }); 70 | }; 71 | 72 | export const usePresignedUploadUrlKb = (id: string, files: string[]) => { 73 | return useApi("presigned_url_kb", { 74 | method: "POST", 75 | body: { 76 | unique_id: id, 77 | files: files 78 | } 79 | }); 80 | } 81 | 82 | export const useProjectExperimentQuestionMetrics = ( 83 | id: string, 84 | experimentId: string 85 | ) => { 86 | return useApi<{ question_metrics: ExperimentQuestionMetric[] }>( 87 | `/execution/${id}/experiment/${experimentId}/question_metrics`, 88 | { 89 | method: "GET", 90 | } 91 | ); 92 | }; 93 | 94 | export const useGuardrailsList = (region:string) => { 95 | return useApi<{ 96 | data : Guardrail[] 97 | }>("/bedrock/guardrails?region="+region, { 98 | method: "GET" 99 | }); 100 | }; 101 | 102 | export const useFetchAllKbModels = (region:string) => { 103 | return useApi("/bedrock/knowledge_bases?region="+region); 104 | } 105 | 106 | export const useKBConfig = () => { 107 | return useApi<{ 108 | data: { 109 | "opensearch": { 110 | "configured": boolean 111 | } 112 | }; 113 | }>("config"); 114 | }; 115 | 116 | export const useHumanEvalQueryExperiments = (experimentIds: any, query: string) => { 117 | return useApi<{ 118 | data: { 119 | results: any[] 120 | } 121 | }>("/heval/query-experiments", { 122 | method: "POST", 123 | body: { 124 | experiment_ids: experimentIds, 125 | query: query 126 | } 127 | }); 128 | } 129 | 130 | export const useHumanEvalUpvote = (experimentIds: any) => { 131 | return useApi("/heval/upvote", { 132 | method: "POST", 133 | body: experimentIds 134 | 135 | }); 136 | } -------------------------------------------------------------------------------- /ui/app/composables/useShareData.ts: -------------------------------------------------------------------------------- 1 | export const useSharedData = () => useState('pageInfo', () => ({ 2 | title : '', 3 | to : {}, 4 | description : '' 5 | })) -------------------------------------------------------------------------------- /ui/app/pages/index.vue: -------------------------------------------------------------------------------- 1 | 10 | 11 | 67 | 68 | -------------------------------------------------------------------------------- /ui/app/pages/projects/[id].vue: -------------------------------------------------------------------------------- 1 | 55 | 56 | 57 | 58 | 100 | -------------------------------------------------------------------------------- /ui/app/pages/projects/[id]/execute.vue: -------------------------------------------------------------------------------- 1 | 46 | 47 | 67 | -------------------------------------------------------------------------------- /ui/app/pages/projects/[id]/experiments/index.vue: -------------------------------------------------------------------------------- 1 | 13 | 14 | -------------------------------------------------------------------------------- /ui/app/pages/projects/[id]/humanevaluation.vue: -------------------------------------------------------------------------------- 1 | 8 | 9 | 14 | 15 | -------------------------------------------------------------------------------- /ui/app/pages/projects/[id]/index.vue: -------------------------------------------------------------------------------- 1 | 36 | 37 | 38 | 39 | 63 | -------------------------------------------------------------------------------- /ui/app/pages/projects/create.vue: -------------------------------------------------------------------------------- 1 | 18 | 19 | 29 | -------------------------------------------------------------------------------- /ui/app/pages/projects/index.vue: -------------------------------------------------------------------------------- 1 | 34 | 35 | 65 | -------------------------------------------------------------------------------- /ui/app/plugins/03.vuequery.ts: -------------------------------------------------------------------------------- 1 | import type { 2 | DehydratedState, 3 | VueQueryPluginOptions, 4 | } from "@tanstack/vue-query"; 5 | import { 6 | VueQueryPlugin, 7 | QueryClient, 8 | hydrate, 9 | dehydrate, 10 | MutationCache, 11 | } from "@tanstack/vue-query"; 12 | // Nuxt 3 app aliases 13 | import { defineNuxtPlugin, useState } from "#imports"; 14 | import type { NuxtError } from "#app"; 15 | 16 | export default defineNuxtPlugin((nuxt) => { 17 | const vueQueryState = useState("vue-query"); 18 | 19 | // Modify your Vue Query global settings here 20 | const queryClient = new QueryClient({ 21 | defaultOptions: { 22 | queries: { staleTime: 5000, refetchIntervalInBackground: true }, 23 | }, 24 | mutationCache: new MutationCache({ 25 | onError: (error) => { 26 | const toast = useToast(); 27 | toast.add({ 28 | title: error.data?.detail?.description || "Something went wrong", 29 | color: "error", 30 | icon: "i-lucide-x-circle", 31 | }); 32 | }, 33 | }), 34 | }); 35 | const options: VueQueryPluginOptions = { queryClient }; 36 | 37 | nuxt.vueApp.use(VueQueryPlugin, options); 38 | 39 | if (import.meta.server) { 40 | nuxt.hooks.hook("app:rendered", () => { 41 | vueQueryState.value = dehydrate(queryClient); 42 | }); 43 | } 44 | 45 | if (import.meta.client) { 46 | hydrate(queryClient, vueQueryState.value); 47 | } 48 | }); 49 | 50 | declare module "@tanstack/vue-query" { 51 | interface Register { 52 | defaultError: NuxtError<{ detail: { description: string } }>; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /ui/app/plugins/mitt.ts: -------------------------------------------------------------------------------- 1 | import mitt from 'mitt' 2 | import { defineNuxtPlugin } from '#app' 3 | 4 | export default defineNuxtPlugin(() => { 5 | const emitter = mitt() 6 | 7 | return { 8 | provide: { 9 | emit: emitter.emit, // Will be available as this.$emit 10 | on: emitter.on, // Will be available as this.$on 11 | off: emitter.off, // Will be available as this.$off 12 | }, 13 | } 14 | }) -------------------------------------------------------------------------------- /ui/app/utils/json-csv.ts: -------------------------------------------------------------------------------- 1 | //Function to convert json to csv 2 | export const jsonToCsv = (json: Record[]): string => { 3 | // Handle empty input 4 | if (!Array.isArray(json) || json.length === 0) { 5 | return ""; 6 | } 7 | 8 | try { 9 | // Flatten the first object to get all possible headers 10 | const flattenedFirst = flattenObject(json[0] || {}); 11 | const headers = Object.keys(flattenedFirst); 12 | 13 | // Create CSV header row 14 | const headerRow = headers.map((header) => escapeCsvValue(header)).join(","); 15 | 16 | // Convert data rows with flattened objects 17 | const rows = json.map((row) => { 18 | const flatRow = flattenObject(row); 19 | return headers.map((header) => escapeCsvValue(flatRow[header])).join(","); 20 | }); 21 | 22 | return [headerRow, ...rows].join("\n"); 23 | } catch (error) { 24 | console.error("Error converting JSON to CSV:", error); 25 | return ""; 26 | } 27 | }; 28 | 29 | // New helper function to flatten nested objects 30 | const flattenObject = ( 31 | obj: Record, 32 | prefix = "" 33 | ): Record => { 34 | return Object.keys(obj).reduce((acc: Record, key: string) => { 35 | const prefixedKey = prefix ? `${prefix}.${key}` : key; 36 | 37 | if (obj[key] && typeof obj[key] === "object" && !Array.isArray(obj[key])) { 38 | Object.assign(acc, flattenObject(obj[key], prefixedKey)); 39 | } else { 40 | acc[prefixedKey] = obj[key]; 41 | } 42 | 43 | return acc; 44 | }, {}); 45 | }; 46 | 47 | // Helper function to properly escape CSV values 48 | const escapeCsvValue = (value: any): string => { 49 | if (value === null || value === undefined) { 50 | return ""; 51 | } 52 | 53 | const stringValue = String(value); 54 | 55 | // If value contains comma, newline, or double quote, wrap in quotes 56 | if ( 57 | stringValue.includes(",") || 58 | stringValue.includes("\n") || 59 | stringValue.includes('"') 60 | ) { 61 | // Double up any existing quotes and wrap in quotes 62 | return `"${stringValue.replace(/"/g, '""')}"`; 63 | } 64 | 65 | return stringValue; 66 | }; 67 | -------------------------------------------------------------------------------- /ui/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | // @ts-expect-error 3 | import eslintConfigPrettier from "eslint-config-prettier"; 4 | import withNuxt from "./.nuxt/eslint.config.mjs"; 5 | 6 | export default withNuxt( 7 | { 8 | rules: { 9 | "vue/first-attribute-linebreak": "off", 10 | "vue/multi-word-component-names": "off", 11 | "@typescript-eslint/no-explicit-any": "off", 12 | }, 13 | }, 14 | eslintConfigPrettier 15 | ); 16 | -------------------------------------------------------------------------------- /ui/nuxt.config.ts: -------------------------------------------------------------------------------- 1 | // https://nuxt.com/docs/api/configuration/nuxt-config 2 | import { loadEnv } from "vite"; 3 | 4 | const env = loadEnv( 5 | process.env.NODE_ENV || "development", 6 | process.cwd(), 7 | "NUXT_" 8 | ); 9 | 10 | export default defineNuxtConfig({ 11 | compatibilityDate: "2024-11-28", 12 | ssr: false, 13 | future: { 14 | compatibilityVersion: 4, 15 | }, 16 | colorMode: { 17 | preference: "light", 18 | }, 19 | devtools: { enabled: true }, 20 | modules: ["@vueuse/nuxt", "@nuxt/eslint", "@nuxt/ui"], 21 | css: ["~/assets/css/main.css"], 22 | experimental: { 23 | typedPages: true, 24 | viewTransition: true, 25 | }, 26 | vite: { 27 | server: { 28 | proxy: { 29 | "/api": { 30 | target: env.NUXT_API_ENDPOINT, 31 | changeOrigin: true, 32 | }, 33 | }, 34 | }, 35 | }, 36 | }); 37 | -------------------------------------------------------------------------------- /ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@flotorch/ui", 3 | "private": true, 4 | "type": "module", 5 | "scripts": { 6 | "build": "nuxt build", 7 | "dev": "nuxt dev", 8 | "generate": "nuxt generate", 9 | "preview": "nuxt preview", 10 | "postinstall": "nuxt prepare", 11 | "lint": "eslint .", 12 | "lint:fix": "eslint . --fix", 13 | "typecheck": "nuxt typecheck" 14 | }, 15 | "dependencies": { 16 | "@nuxt/eslint": "0.7.2", 17 | "@nuxt/ui": "3.0.0-alpha.9", 18 | "@tanstack/vue-query": "5.62.1", 19 | "@uppy/aws-s3": "^4.1.3", 20 | "@uppy/dashboard": "^4.1.3", 21 | "@uppy/drag-drop": "^4.0.5", 22 | "@uppy/file-input": "^4.0.4", 23 | "@uppy/progress-bar": "^4.0.2", 24 | "@uppy/tus": "^4.1.5", 25 | "@uppy/vue": "^2.0.3", 26 | "@uppy/webcam": "^4.0.3", 27 | "@uppy/xhr-upload": "^4.2.3", 28 | "@vee-validate/zod": "4.14.7", 29 | "@vueuse/math": "12.0.0", 30 | "@vueuse/nuxt": "12.0.0", 31 | "@vueuse/router": "12.0.0", 32 | "mitt": "^3.0.1", 33 | "nuxt": "3.14.1592", 34 | "scule": "1.3.0", 35 | "slugify": "^1.6.6", 36 | "uuid": "^11.0.3", 37 | "vee-validate": "4.14.7", 38 | "vue": "latest", 39 | "vue-router": "latest", 40 | "zod": "3.23.8" 41 | }, 42 | "devDependencies": { 43 | "@iconify-json/lucide": "1.2.17", 44 | "eslint-config-prettier": "9.1.0", 45 | "prettier": "3.4.1", 46 | "typescript": "5.6.2", 47 | "vue-tsc": "2.1.10" 48 | }, 49 | "packageManager": "pnpm@9.14.4", 50 | "pnpm": { 51 | "overrides": { 52 | "typescript": "5.6.2" 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /ui/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /ui/public/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/android-chrome-512x512.png -------------------------------------------------------------------------------- /ui/public/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/apple-touch-icon.png -------------------------------------------------------------------------------- /ui/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/favicon-16x16.png -------------------------------------------------------------------------------- /ui/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/favicon-32x32.png -------------------------------------------------------------------------------- /ui/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/favicon.ico -------------------------------------------------------------------------------- /ui/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/favicon.png -------------------------------------------------------------------------------- /ui/public/flotorch-hero.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/flotorch-hero.gif -------------------------------------------------------------------------------- /ui/public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FissionAI/FloTorch/5e27e13de07f72c23364d270bf7a086cd864469e/ui/public/logo.png -------------------------------------------------------------------------------- /ui/public/prompt-guide.json: -------------------------------------------------------------------------------- 1 | { 2 | "system_prompt": "You are a helpful assistant.", 3 | "examples": [ 4 | { 5 | "question": "Question 1 to include to N shot prompts", 6 | "answer": "Answer 1 to include to N shot prompts" 7 | }, 8 | { 9 | "question": "Question 2 to include to N shot prompts", 10 | "answer": "Answer 2 to include to N shot prompts" 11 | } 12 | ], 13 | "user_prompt": "Now answer the user's question" 14 | } 15 | -------------------------------------------------------------------------------- /ui/public/robots.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /ui/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} -------------------------------------------------------------------------------- /ui/server/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../.nuxt/tsconfig.server.json" 3 | } 4 | -------------------------------------------------------------------------------- /ui/shared/types/breadcumb.type.ts: -------------------------------------------------------------------------------- 1 | export interface BreadcrumbItem { 2 | label: string; 3 | icon?: string; 4 | to: string; 5 | disabled?: boolean; 6 | } 7 | 8 | export interface CustomLabels { 9 | [key: string]: string; 10 | } 11 | -------------------------------------------------------------------------------- /ui/shared/types/experiments.type.ts: -------------------------------------------------------------------------------- 1 | export interface ValidExperiment { 2 | region: string; 3 | chunking_strategy: string; 4 | vector_dimension: number; 5 | chunk_size: number; 6 | chunk_overlap: number; 7 | hierarchical_parent_chunk_size: number; 8 | hierarchical_child_chunk_size: number; 9 | hierarchical_chunk_overlap_percentage: number; 10 | indexing_algorithm: string; 11 | knn_num: number; 12 | temp_retrieval_llm: number; 13 | n_shot_prompts: number; 14 | embedding_service: string; 15 | embedding_model: string; 16 | retrieval_service: string; 17 | retrieval_model: string; 18 | directional_pricing: number; 19 | rerank_model_id: string; 20 | } 21 | 22 | export interface ExperimentQuestionMetric { 23 | question: string; 24 | execution_id: string; 25 | reference_contexts: string[]; 26 | experiment_id: string; 27 | gt_answer: string; 28 | generated_answer: string; 29 | id: string; 30 | eval_metrics: EvalMetrics | EvalMetricsM; 31 | } 32 | -------------------------------------------------------------------------------- /ui/shared/types/humaneval.type.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | export const HumanEvalSchema = z.object({ 4 | message: z.string(), 5 | }); 6 | 7 | export type HumanEval = z.infer; -------------------------------------------------------------------------------- /ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // https://nuxt.com/docs/guide/concepts/typescript 3 | "extends": "./.nuxt/tsconfig.json" 4 | } 5 | -------------------------------------------------------------------------------- /usage_guide.md: -------------------------------------------------------------------------------- 1 | # Using the FloTorch Application for RAG Evaluation 2 | 3 | After you login to the App Runner instance hosting the FloTorch UI application, you will be greeted with a Welcome Page. 4 | 5 | ## Welcome Page 6 | 7 | Upon accessing FloTorch, you are greeted with a welcome page. Click ‘Get started’ to initiate your first project. 8 | 9 | --- 10 | 11 | ## Viewing Projects 12 | 13 | You’ll be taken to the "Projects" section to view all existing projects. 14 | Each project is listed with details such as ID, Name, Region, Status, and Date of completion or initiation. 15 | **Example ID**: `5GM2E` 16 | 17 | --- 18 | 19 | ## Creating a New Project 20 | 21 | When creating a new project, you'll go through three main steps where you'll need to specify the necessary settings and options for your project. 22 | 23 | You will also have the option to use a previously saved configuration file if you have one. Simply click on 'Upload config' and select a valid JSON file with all necessary parameters. The application will automatically load these parameters and display the available experiments you can run. If you don't have a configuration file, please proceed with manual setup. 24 | 25 | ### Data Strategy 26 | 27 | - Click on "Create Project" to start a new project. 28 | - Fill in required fields such as **Project Name**, **Region**, **Knowledge Base Data**, and **Ground Truth Data**. 29 | 30 | ### Indexing Strategy 31 | 32 | In this page, you’ll be configuring experiment indexing-related settings. Define experiment parameters, including: 33 | 34 | - **Chunking Strategy** 35 | - **Vector Dimension** 36 | - **Chunk Size** 37 | - **Chunk Overlap Percentage** 38 | - **Indexing Algorithm** (e.g., HNSW) 39 | - **Embedding Model** (e.g., Titan Embeddings V2 - Text) 40 | 41 | ### Retrieval Strategy 42 | 43 | In this page, you’ll be configuring experiment retrieval-related settings. Define the parameters: 44 | 45 | - **N shot prompt**; provide a shot prompt file if you’re going with non-zero shot prompt. 46 | - **KNN** 47 | - **Inferencing LLM** 48 | - **Inferencing LLM Temperature** 49 | 50 | Once these are selected, all the valid configurations will be displayed on the next page based on the choices you’ve made. 51 | 52 | You will have the option to save the valid configurations by clicking the ‘Download config’ button. 53 | 54 | Please review the configurations and select all the experiments that you’d like to run by marking the checkboxes and click ‘Run’. Now all the experiments you had marked will be displayed on a table, review it and click ‘Confirm’ to start the experiments. 55 | 56 | You’ll now be taken back to the projects page where you can monitor the status of experiments. 57 | 58 | Each experiment is tracked by ID, embedding model used, indexing algorithm, and other parameters. 59 | **Example statuses** include "Not Started", "In Progress", “Failed” or "Completed". 60 | 61 | If you select an experiment that is in progress, you’ll be able to view its status in the experiment table. 62 | **Statuses** include: 63 | 64 | - "Not started" 65 | - "Indexing in progress" 66 | - "Retrieval in progress" 67 | - "Completed" 68 | 69 | --- 70 | 71 | ## Evaluation 72 | 73 | Once an experiment is completed, an evaluation will be run based on a few metrics and the results will be displayed in the experiment table along with directional pricing and the duration. 74 | The evaluation metrics include: 75 | 76 | - **Faithfulness** 77 | - **Context Precision** 78 | - **Aspect Critic** 79 | - **Answer Relevancy** 80 | 81 | If you’d like to see the answers the model generated, you can click on the experiment ID to view them along with the questions and the ground truth answers. 82 | 83 | You’ll also have the option to view all the parameters of the experiment configuration; click the ‘details’ button on the same page. -------------------------------------------------------------------------------- /util/bedrock_utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from config.config import Config 3 | import logging 4 | import functools 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | class KnowledgeBaseUtils(): 10 | def __init__(self, region): 11 | """Initialize KnowledgeBaseUtils with config and bedrock-agent client""" 12 | self.config = Config.load_config() 13 | self.client = boto3.client("bedrock-agent", region_name=region) 14 | 15 | def list_knowledge_bases(self): 16 | """ 17 | List and filter vector knowledge bases that contain documents. 18 | 19 | Returns: 20 | list: List of dictionaries containing knowledge base IDs and names that: 21 | - Are of type VECTOR 22 | - Have at least one data source in AVAILABLE STATE 23 | 24 | Raises: 25 | Exception: If there is an error listing or accessing knowledge bases 26 | """ 27 | try: 28 | # Get list of all knowledge bases with pagination limit of 123 29 | response = self.client.list_knowledge_bases(maxResults=1000) 30 | logger.debug(f"Knowledge bases list response: {response}") 31 | valid_knowledge_bases = [] 32 | # Process each knowledge base 33 | for item in response.get('knowledgeBaseSummaries'): 34 | if item.get('status') == "ACTIVE": 35 | 36 | kb_id = item.get("knowledgeBaseId") 37 | kb_name = item.get("name") 38 | kb_description = item.get("description", "") 39 | logger.debug(f"Processing knowledge base: {kb_name} ({kb_id})") 40 | 41 | # Get detailed configuration for the knowledge base 42 | kb_details = self.client.get_knowledge_base(knowledgeBaseId=kb_id) 43 | 44 | # Only process vector type knowledge bases 45 | if kb_details['knowledgeBase']['knowledgeBaseConfiguration']['type'] == "VECTOR": 46 | logger.info(f"Found vector knowledge base: {kb_name} ({kb_id})") 47 | 48 | # List all the data source associated with the knowledge base 49 | data_sources = self.client.list_data_sources(knowledgeBaseId=kb_id, maxResults=1000) 50 | # Check if at least one data source is in AVAILABLE status 51 | has_available_data_source = any(ds['status'] == 'AVAILABLE' for ds in data_sources['dataSourceSummaries']) 52 | 53 | if has_available_data_source: 54 | logger.info(f"Found at least one AVAILABLE data source for knowledge base: {kb_name}") 55 | 56 | valid_knowledge_bases.append({ 57 | 'kb_id': kb_id, 58 | 'name': kb_name, 59 | 'description': kb_description 60 | }) 61 | 62 | else: 63 | logger.debug(f"Skipping non-vector knowledge base: {kb_name} ({kb_id})") 64 | 65 | except Exception as e: 66 | logger.error(f"Failed to list knowledge bases: {str(e)}") 67 | raise e 68 | 69 | return valid_knowledge_bases 70 | 71 | @functools.lru_cache(maxsize=128) 72 | def get_kb_name(self, kb_id): 73 | """Get the name of a knowledge base given its ID""" 74 | try: 75 | response = self.client.get_knowledge_base(knowledgeBaseId=kb_id) 76 | name = response['knowledgeBase']['name'] 77 | return name 78 | except Exception as e: 79 | logger.error(f"Failed to get knowledge base name: {str(e)}") 80 | raise e 81 | -------------------------------------------------------------------------------- /util/boto3_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import time 3 | import logging 4 | import botocore 5 | from baseclasses.base_classes import BotoRetryHandler, RetryParams 6 | 7 | logger = logging.getLogger() 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | class BedRockRetryHander(BotoRetryHandler): 11 | """Retry handler for Bedrock service.""" 12 | @property 13 | def retry_params(self) -> RetryParams: 14 | return RetryParams( 15 | max_retries=5, 16 | retry_delay=2, 17 | backoff_factor=2 18 | ) 19 | 20 | @property 21 | def retryable_errors(self): 22 | return { 23 | "ThrottlingException", 24 | "ServiceQuotaExceededException", 25 | "ModelTimeoutException" 26 | } -------------------------------------------------------------------------------- /util/date_time_utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | 3 | class DateTimeUtils: 4 | @staticmethod 5 | def parse_datetime(datetime_str): 6 | if not datetime_str: 7 | return None 8 | try: 9 | dt = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S.%fZ') 10 | except ValueError: 11 | dt = datetime.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S%z') 12 | 13 | if dt.tzinfo is None: 14 | dt = dt.replace(tzinfo=timezone.utc) 15 | return dt -------------------------------------------------------------------------------- /util/dynamo_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | 3 | import logging 4 | 5 | logger = logging.getLogger() 6 | logging.basicConfig(level=logging.INFO) 7 | 8 | def deserialize_dynamodb_json(dynamodb_json: Dict[str, Any]) -> Dict[str, Any]: 9 | """ 10 | Deserialize DynamoDB JSON to regular Python dictionary. 11 | 12 | Args: 13 | dynamodb_json (Dict[str, Any]): DynamoDB JSON formatted dictionary 14 | 15 | Returns: 16 | Dict[str, Any]: Regular Python dictionary 17 | """ 18 | # Handle None/Null response from DynamoDB 19 | if dynamodb_json is None: 20 | return None 21 | 22 | try: 23 | def _deserialize_value(value: Dict[str, Any]) -> Any: 24 | if not isinstance(value, dict): 25 | return value 26 | 27 | if 'N' in value: 28 | return float(value['N']) 29 | elif 'S' in value: 30 | return value['S'] 31 | elif 'BOOL' in value: 32 | return value['BOOL'] 33 | elif 'NULL' in value: 34 | return None 35 | elif 'L' in value: 36 | return [_deserialize_value(item) for item in value['L']] 37 | elif 'M' in value: 38 | return {k: _deserialize_value(v) for k, v in value['M'].items()} 39 | elif 'SS' in value: 40 | return set(value['SS']) 41 | elif 'NS' in value: 42 | return {float(n) for n in value['NS']} 43 | elif 'Nul' in value: 44 | return None 45 | else: 46 | return value 47 | 48 | return {k: _deserialize_value(v) for k, v in dynamodb_json.items()} 49 | 50 | except Exception as e: 51 | logger.error(f"Error deserializing DynamoDB JSON: {e}") 52 | raise -------------------------------------------------------------------------------- /util/error_handling.py: -------------------------------------------------------------------------------- 1 | def create_error_response(error_type: str, description: str): 2 | """ 3 | Constructs a consistent error response. 4 | 5 | Args: 6 | error_type (str): The type of the error (e.g., VALIDATION_ERROR). 7 | description (str): A human-readable description of the error. 8 | field (str, optional): The field causing the error. 9 | location (str, optional): The location of the field (e.g., body, prestep). 10 | 11 | Returns: 12 | dict: A structured error response. 13 | """ 14 | error_detail = {"type": error_type, "description": description} 15 | return error_detail -------------------------------------------------------------------------------- /util/guard_rails_utils.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from typing import Dict, List 3 | import logging 4 | 5 | logger = logging.getLogger() 6 | logger.setLevel(logging.INFO) 7 | 8 | class GuardRailsUtils: 9 | @staticmethod 10 | def get_bedrock_guardrails(region) -> List[Dict]: 11 | "Static method to fetch AWS Bedrock guardrails." 12 | 13 | try: 14 | client = boto3.client('bedrock', region_name=region) 15 | 16 | logger.info("Fetching guardrails.") 17 | response = client.list_guardrails() 18 | guardrails = response.get("guardrails", []) 19 | logger.info("Guardrails fetched.") 20 | 21 | all_guardrails = [] 22 | 23 | for guardrail in guardrails: 24 | guardrail_id = guardrail.get("id") 25 | 26 | # Fetch all versions for this guardrail 27 | logger.info(f"Fetching versions for guardrail: {guardrail_id}") 28 | versions_response = client.list_guardrails( 29 | guardrailIdentifier=guardrail_id 30 | ) 31 | versions = versions_response.get("guardrails", []) 32 | 33 | # Handle pagination for versions 34 | next_token = versions_response.get("nextToken") 35 | while next_token: 36 | versions_response = client.list_guardrails( 37 | guardrailIdentifier=guardrail_id, 38 | nextToken=next_token 39 | ) 40 | versions.extend(versions_response.get("guardrails", [])) 41 | next_token = versions_response.get("nextToken") 42 | 43 | # Add each version as a separate entry 44 | for version in versions: 45 | all_guardrails.append({ 46 | "guardrails_id": version.get("id"), 47 | "description": version.get("description"), 48 | "name": version.get("name"), 49 | "version": version.get("version"), 50 | "arn": version.get("arn") 51 | }) 52 | 53 | logger.info(f"Successfully fetched {len(all_guardrails)} guardrail versions.") 54 | return all_guardrails 55 | 56 | except Exception as e: 57 | logger.error(f"Failed to fetch guardrails: {str(e)}") 58 | raise Exception(f"Failed to fetch guardrails: {str(e)}") 59 | 60 | -------------------------------------------------------------------------------- /util/open_search_config_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | class OpenSearchUtils: 4 | @staticmethod 5 | def opensearch_config(): 6 | opensearch_endpoint = os.getenv("OPENSEARCH_ENDPOINT") 7 | configured = bool(opensearch_endpoint) # True if string is not empty/None, otherwise False. 8 | return {"configured": configured} -------------------------------------------------------------------------------- /util/pdf_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PyPDF2 import PdfReader 3 | import logging 4 | from io import StringIO 5 | import fitz 6 | 7 | logger = logging.getLogger() 8 | logger.setLevel(logging.INFO) 9 | 10 | def extract_text_from_pdf(file_path: str) -> str: 11 | """Extract text from a PDF file.""" 12 | try: 13 | logger.info(f"Extracting text from PDF: {file_path}") 14 | reader = PdfReader(file_path) 15 | text = "" 16 | for page in reader.pages: 17 | text += page.extract_text() or "" 18 | logger.info("Text extraction from PDF successful.") 19 | return text 20 | except Exception as e: 21 | logger.error(f"Failed to extract text from PDF: {e}") 22 | raise 23 | 24 | def extract_text_from_pdf_pymudf(file_path: str) -> str: 25 | """Extract text from a PDF file.""" 26 | try: 27 | logger.info(f"Extracting text from PDF: {file_path}") 28 | doc = fitz.open(file_path) 29 | text_buffer = StringIO() 30 | for page in doc: 31 | page_text = page.get_text() or "" 32 | text_buffer.write(page_text) 33 | 34 | logger.info("Text extraction from PDF successful.") 35 | text = text_buffer.getvalue() 36 | text_buffer.close() 37 | return text 38 | except Exception as e: 39 | logger.error(f"Failed to extract text from PDF: {e}") 40 | raise 41 | 42 | def process_pdf_from_folder(file_path: str) -> str: 43 | "Extract text from all files in a folder" 44 | try: 45 | text_data = [] 46 | for file in os.listdir(file_path): 47 | file_text = extract_text_from_pdf(os.path.join(file_path, file)) 48 | text_data.append(file_text) 49 | logger.info(f"Extracted text from all files. Number of files: {len(text_data)}") 50 | return text_data 51 | except Exception as e: 52 | logger.error(f"Failed to extract text from PDF: {e}") 53 | raise --------------------------------------------------------------------------------