├── .github ├── ISSUE_TEMPLATE │ ├── feat-request.md │ └── fix-request.md ├── PULL_REQUEST_TEMPLATE.md ├── actions │ └── setup │ │ └── action.yml └── workflows │ ├── check.yml │ └── publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENCE.txt ├── MLproject ├── README.md ├── confs ├── .gitkeep ├── deployment.yaml ├── feature_eng.yaml ├── generate_rag_dataset.yaml ├── monitoring.yaml └── rag_chain_config.yaml ├── data ├── .gitkeep ├── datasets │ ├── .gitkeep │ ├── rag_dataset.csv │ └── rag_dataset.json └── documents │ └── sample_hr_manual.pdf ├── docker-compose.yml ├── docs └── .gitkeep ├── invoke.yaml ├── llmops-project.code-workspace ├── mlruns └── .gitkeep ├── notebooks └── .gitkeep ├── outputs └── .gitkeep ├── poetry.lock ├── poetry.toml ├── pyproject.toml ├── serving_endpoint ├── Dockerfile ├── README.md ├── client.py ├── ecs │ ├── Makefile │ ├── README.md │ ├── deploy-image-to-ecs.sh │ └── infra │ │ ├── app │ │ ├── ecs │ │ │ ├── main.tf │ │ │ ├── output.tf │ │ │ └── variable.tf │ │ ├── main.tf │ │ ├── network │ │ │ ├── main.tf │ │ │ ├── outputs.tf │ │ │ └── variable.tf │ │ ├── terraform.tfstate.backup │ │ └── variable.tf │ │ └── setup │ │ ├── main.tf │ │ ├── output.tf │ │ └── variable.tf ├── example.env ├── imgs │ ├── Model-deployment.webp │ ├── litserve-deployment.png │ ├── litserve.png │ └── model-registry.webp ├── requirements.txt └── server.py ├── src └── llmops_project │ ├── __init__.py │ ├── __main__.py │ ├── io │ ├── configs.py │ ├── services.py │ └── vector_db.py │ ├── models │ ├── chatbot_with_guardrails.py │ └── rag_chatbot.py │ ├── pipelines │ ├── __init__.py │ ├── base.py │ ├── deployment │ │ ├── deploy_model.py │ │ └── register_model.py │ ├── feature_engineering │ │ ├── create_vector_db.py │ │ └── ingest_documents.py │ ├── managers │ │ ├── deployment_manager.py │ │ ├── feature_engineering_manager.py │ │ └── monitoring_manager.py │ └── monitoring │ │ ├── generate_rag_dataset.py │ │ ├── post_deploy_eval.py │ │ └── pre_deploy_eval.py │ ├── scripts.py │ └── settings.py ├── static ├── autoscaling.png ├── experiment_tracking.png ├── guage.png ├── llmops-rag.png ├── llmops.png ├── llmopsmindmap.png ├── model_version.png ├── monitoring.png ├── rag_lifecycle.png ├── tracing-top.gif ├── vector_db.png └── with_and_without_guardrails.svg ├── tasks ├── __init__.py ├── checks.py ├── cleans.py ├── commits.py ├── containers.py ├── docs.py ├── formats.py ├── installs.py ├── mlflow.py ├── packages.py ├── projects.py └── serve.py └── tests ├── confs ├── invalid │ └── 0. invalid.yaml └── valid │ ├── 0. feature_engineering.yaml │ ├── 1. deployment.yaml │ └── 2. monitoring.yaml ├── conftest.py ├── documents └── sample_hr_manual.pdf ├── io ├── test_configs.py └── test_services.py ├── pipelines ├── deployment │ ├── test_deploy_model.py │ └── test_register_model.py ├── feature_engineering │ ├── test_create_vector_db.py │ └── test_ingest_documents.py ├── monitoring │ ├── test_generate_rag_dataset.py │ └── test_pre_deploy_eval.py └── test_base.py └── test_scripts.py /.github/ISSUE_TEMPLATE/feat-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: A new feature. 4 | title: "[FEAT] " 5 | labels: feat 6 | assignees: callmesora 7 | --- 8 | 9 | ## Description 10 | 11 | ## Motivation 12 | 13 | ## Solutions -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/fix-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Fix Request 3 | about: A bug fix 4 | title: "[FIX] " 5 | labels: fix 6 | assignees: callmesora 7 | --- 8 | 9 | ## Bug Description 10 | 11 | ## Expected Behavior 12 | 13 | ## Steps to Reproduce 14 | 15 | ## Additional Context -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Changes 2 | 3 | # Reasons 4 | 5 | # Testing 6 | 7 | # Impacts 8 | 9 | # Notes -------------------------------------------------------------------------------- /.github/actions/setup/action.yml: -------------------------------------------------------------------------------- 1 | name: Setup 2 | description: Setup for project workflows 3 | runs: 4 | using: composite 5 | steps: 6 | - run: pipx install invoke poetry 7 | shell: bash 8 | - uses: actions/setup-python@v5 9 | with: 10 | python-version: 3.12 11 | cache: poetry 12 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | name: Check 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | concurrency: 7 | cancel-in-progress: true 8 | group: ${{ github.workflow }}-${{ github.ref }} 9 | jobs: 10 | checks: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: ./.github/actions/setup 15 | - run: poetry install --with checks 16 | - run: poetry run invoke checks.format 17 | - run: poetry run invoke checks.code 18 | - run: poetry run invoke checks.type 19 | - run: poetry run invoke checks.security 20 | # TODO: Add tests once figuring out how to mock mlflow and qdrant 21 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | on: 3 | release: 4 | types: 5 | - edited 6 | - published 7 | env: 8 | DOCKER_IMAGE: ghcr.io/callmesora/llmops-project 9 | concurrency: 10 | cancel-in-progress: true 11 | group: publish-workflow 12 | jobs: 13 | pages: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v4 17 | - uses: ./.github/actions/setup 18 | - run: poetry install --with docs 19 | - run: poetry run invoke docs 20 | - uses: JamesIves/github-pages-deploy-action@v4 21 | with: 22 | folder: docs/ 23 | branch: gh-pages 24 | packages: 25 | permissions: 26 | packages: write 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v4 30 | - uses: ./.github/actions/setup 31 | - run: poetry install --with dev 32 | - run: poetry run invoke packages 33 | - uses: docker/login-action@v3 34 | with: 35 | registry: ghcr.io 36 | username: ${{ github.actor }} 37 | password: ${{ secrets.GITHUB_TOKEN }} 38 | - uses: docker/setup-buildx-action@v3 39 | - uses: docker/build-push-action@v6 40 | with: 41 | push: true 42 | context: . 43 | cache-to: type=gha 44 | cache-from: type=gha 45 | tags: | 46 | ${{ env.DOCKER_IMAGE }}:latest 47 | ${{ env.DOCKER_IMAGE }}:${{ github.ref_name }} 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # https://git-scm.com/docs/gitignore 2 | 3 | # Build 4 | /dist/ 5 | /build/ 6 | 7 | # Cache 8 | .cache/ 9 | .coverage* 10 | .mypy_cache/ 11 | .ruff_cache/ 12 | .pytest_cache/ 13 | .terraform.* 14 | .terraform 15 | terraform.tfstate 16 | 17 | # Editor 18 | /.idea/ 19 | /.vscode/ 20 | .ipynb_checkpoints/ 21 | 22 | # Environs 23 | .env 24 | /.venv/ 25 | 26 | # Project 27 | /docs/* 28 | /mlruns/* 29 | /outputs/* 30 | !**/.gitkeep 31 | /qdrant_data/* 32 | qa_dataset.csv 33 | qa_dataset.json 34 | 35 | # Python 36 | *.py[cod] 37 | __pycache__/ 38 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # https://pre-commit.com 2 | # https://pre-commit.com/hooks.html 3 | 4 | default_language_version: 5 | python: python3.12 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.6.0 9 | hooks: 10 | - id: check-added-large-files 11 | - id: check-case-conflict 12 | - id: check-merge-conflict 13 | - id: check-toml 14 | - id: check-yaml 15 | - id: debug-statements 16 | - id: end-of-file-fixer 17 | - id: mixed-line-ending 18 | - id: trailing-whitespace 19 | - repo: https://github.com/python-poetry/poetry 20 | rev: 1.8.3 21 | hooks: 22 | - id: poetry-check 23 | - repo: https://github.com/astral-sh/ruff-pre-commit 24 | rev: v0.5.0 25 | hooks: 26 | - id: ruff 27 | - id: ruff-format 28 | - repo: https://github.com/commitizen-tools/commitizen 29 | rev: v3.27.0 30 | hooks: 31 | - id: commitizen 32 | - id: commitizen-branch 33 | stages: [push] 34 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v0.2.0 (2024-12-10) 2 | 3 | ### Feat 4 | 5 | - enable Commitizen hooks in pre-commit configuration 6 | - add Contributor Covenant Code of Conduct and remove deprecated client.py file 7 | - add issue and pull request templates for feature and fix requests 8 | - update environment configuration and improve service structure 9 | - add llmops package 10 | 11 | ### Fix 12 | 13 | - **tasks**: add color to terminal when running inv commands 14 | - update .gitignore to exclude qa_dataset files 15 | - adjust mypy settings and update test parameters for document ingestion 16 | - update test order 17 | - update package name 18 | - update dependencies and mlflow track serv 19 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | github@fmind.dev. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # https://docs.docker.com/engine/reference/builder/ 2 | 3 | FROM python:3.12 4 | COPY dist/*.whl . 5 | RUN pip install *.whl 6 | CMD ["llmops-project", "--help"] 7 | -------------------------------------------------------------------------------- /LICENCE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2024 Pedro Azevedo (callmesora) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MLproject: -------------------------------------------------------------------------------- 1 | # https://mlflow.org/docs/latest/projects.html 2 | 3 | name: LLMOps Project 4 | python_env: python_env.yaml 5 | entry_points: 6 | main: 7 | parameters: 8 | job: string 9 | command: "PYTHONPATH=src python -m llmops_project {job}" 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LLMOps Python Pacakge 2 | 3 | [![check.yml](https://github.com/callmesora/llmops-python-package/actions/workflows/check.yml/badge.svg)](https://github.com/callmesora/llmops-python-package/actions/workflows/check.yml) 4 | [![publish.yml](https://github.com/callmesora/llmops-python-package/actions/workflows/publish.yml/badge.svg)](https://github.com/callmesora/llmops-python-package/actions/workflows/publish.yml) 5 | 6 | [![License](https://img.shields.io/github/license/callmesora/llmops-python-package)](https://github.com/callmesora/llmops-python-package/blob/main/LICENCE.txt) 7 | [![Release](https://img.shields.io/github/v/release/callmesora/llmops-python-package)](https://github.com/callmesora/llmops-python-package/releases) 8 | 9 | **This repository contains a Python code base with best practices designed to support your LLMOps initiatives.** 10 | 11 | ![LLMOps Python Package](static/llmops.png) 12 | 13 | The package leverages several [tools](#tools) and [tips](#tips) to make your LLMOps experience as flexible, robust, productive as possible. 14 | 15 | 16 | You can use this package as part of your LLMOps toolkit or platform (e.g., Model Registry, Experiment Tracking, Realtime Inference, ...). 17 | 18 | This package is a variation / fork of these resources but specicially tailored for LLM use cases: 19 | 20 | **Related Resources**: 21 | - **[MLOps Coding Course (Learning)](https://github.com/MLOps-Courses/mlops-coding-course)**: Learn how to create, develop, and maintain a state-of-the-art MLOps code base. 22 | - **[Cookiecutter MLOps Package (Template)](https://github.com/fmind/cookiecutter-mlops-package)**: Start building and deploying Python packages and Docker images for MLOps tasks. 23 | 24 | ![](static/llmopsmindmap.png) 25 | 26 | # Table of Contents 27 | 28 | - [LLMOps Python Package](#mlops-python-package) 29 | - [Table of Contents](#table-of-contents) 30 | - [Install](#install) 31 | - [Prerequisites](#prerequisites) 32 | - [Installation](#installation) 33 | - [Next Steps](#next-steps) 34 | - [Usage](#usage) 35 | - [Configuration](#configuration) 36 | - [Execution](#execution) 37 | - [Automation](#automation) 38 | - [Workflows](#workflows) 39 | - [Tools](#tools) 40 | - [Automation](#automation-1) 41 | - [Commits: Commitizen](#commits-commitizen) 42 | - [Git Hooks: Pre-Commit](#git-hooks-pre-commit) 43 | - [Tasks: PyInvoke](#tasks-pyinvoke) 44 | - [CI/CD](#cicd) 45 | - [Runner: GitHub Actions](#runner-github-actions) 46 | - [CLI](#cli) 47 | - [Parser: Argparse](#parser-argparse) 48 | - [Logging: Loguru](#logging-loguru) 49 | - [Code](#code) 50 | - [Coverage: Coverage](#coverage-coverage) 51 | - [Editor: VS Code](#editor-vs-code) 52 | - [Formatting: Ruff](#formatting-ruff) 53 | - [Quality: Ruff](#quality-ruff) 54 | - [Security: Bandit](#security-bandit) 55 | - [Testing: Pytest](#testing-pytest) 56 | - [Typing: Mypy](#typing-mypy) 57 | - [Versioning: Git](#versioning-git) 58 | - [Configs](#configs) 59 | - [Format: YAML](#format-yaml) 60 | - [Parser: OmegaConf](#parser-omegaconf) 61 | - [Reader: Cloudpathlib](#reader-cloudpathlib) 62 | - [Validator: Pydantic](#validator-pydantic) 63 | - [Model](#model) 64 | - [Format: Mlflow Model](#format-mlflow-model) 65 | - [Registry: Mlflow Registry](#registry-mlflow-registry) 66 | - [Tracking: Mlflow Tracking](#tracking-mlflow-tracking) 67 | - [Package](#package) 68 | - [Evolution: Changelog](#evolution-changelog) 69 | - [Format: Wheel](#format-wheel) 70 | - [Manager: Poetry](#manager-poetry) 71 | - [Runtime: Docker](#runtime-docker) 72 | - [Programming](#programming) 73 | - [Language: Python](#language-python) 74 | - [Version: Pyenv](#version-pyenv) 75 | - [Observability](#observability) 76 | - [Monitoring : Mlflow Evaluate](#monitoring--mlflow-evaluate) 77 | - [Infrastructure: Mlflow System Metrics](#infrastructure-mlflow-system-metrics) 78 | - [Model Serving](#endpoint) 79 | - [Serving Endpoint: Litserve](#serving-endpoint) 80 | - [Tips](#tips) 81 | - [Design Patterns](#design-patterns) 82 | - [Directed-Acyclic Graph](#directed-acyclic-graph) 83 | - [Program Service](#program-service) 84 | - [Soft Coding](#soft-coding) 85 | - [SOLID Principles](#solid-principles) 86 | - [IO Separation](#io-separation) 87 | - [Python Powers](#python-powers) 88 | - [Context Manager](#context-manager) 89 | - [Python Package](#python-package) 90 | - [Software Engineering](#software-engineering) 91 | - [Code Typing](#code-typing) 92 | - [Config Typing](#config-typing) 93 | - [Object Oriented](#object-oriented) 94 | - [Semantic Versioning](#semantic-versioning) 95 | - [Testing Tricks](#testing-tricks) 96 | - [Parallel Testing](#parallel-testing) 97 | - [Test Fixtures](#test-fixtures) 98 | - [VS Code](#vs-code) 99 | - [Code Workspace](#code-workspace) 100 | - [GitHub Copilot](#github-copilot) 101 | - [Resources](#resources) 102 | - [Python](#python) 103 | - [AI/ML/MLOps](#aimlmlops) 104 | 105 | # Architecture 106 | 107 | ## RAG Evaluation 108 | 109 | RAG Evaluation is performed by generating a synthetic dataset of QA answer pairs. This dataset serves as a baseline to evaluate the performance of different RAG systems before deploying them. By using a consistent and controlled dataset, we can objectively compare the effectiveness of various RAG implementations. 110 | 111 | ![RAG Lifecycle](static/rag_lifecycle.png) 112 | 113 | ## Model Registry 114 | 115 | We use a pattern where all LLM chains are stored and logged in Mlflow. Each chain is evaluated against the RAG evaluation baseline. If a chain demonstrates better performance than the previous ones, it is registered and promoted to production. This ensures that only the best-performing models are deployed. 116 | 117 | ![Experiment Tracking](static/experiment_tracking.png) 118 | 119 | ## Guardrails 120 | Having Guardrails is important in production since it prevents the model from entering unexpected/ undesired behaviours. 121 | 122 | This LLMOps template comes with a setup config files for guardrails for PII and Topic censuring that is built on top of [Guardrails AI](https://github.com/guardrails-ai/guardrails?tab=readme-ov-file) 123 | 124 | ![Guardrails](static/with_and_without_guardrails.svg) 125 | 126 | 127 | ## Endpoint Deployment 128 | 129 | Having a model registry is crucial for managing and running deployments. In this architecture, we use [Litserve](https://lightning.ai/docs/litserve/home), which builds on top of FastAPI, to deploy our LLMs. This setup allows for flexible deployment options, including Kubernetes and AWS Lambda, ensuring that our models can be scaled and managed efficiently. 130 | 131 | ![Litserve](static/autoscaling.png) 132 | 133 | You can check how to serve the model as well as code template to deploy on AWS Fargate under `/serving_endpoint`folder 134 | 135 | 136 | ## Model Monitoring 137 | 138 | Model monitoring is crucial for ensuring the performance and reliability of your LLMs in production. Continuous monitoring helps in detecting issues such as performance degradation, and unexpected behaviors, which can significantly impact the user experience and business outcomes. 139 | 140 | ![Mlflow Traces](static/tracing-top.gif) 141 | 142 | 143 | We use Mlflow Traces for monitoring our LLMs. This allows us to track various metrics and logs associated with the models over time. Additionally, we run evaluations on these traces using Mlflow Evaluate, with the LLM itself acting as a judge. This setup ensures that we maintain high standards for model performance and can quickly identify and address any issues that arise. 144 | 145 | 146 | ## LLMOps Design Pattern 147 | In this project we use a very similar design pattern to that recommended by databricks, where each model gets logged on mlflow before its deployed. 148 | ![LLMOps Databricks](static/llmops-rag.png) 149 | 150 | The main variations here is that we the deployment pipeline is orchestrated in the form of two steps with register, validations and final deployment on the registry. Instead of data drift we are measuring differences in LLM metrics and finnaly we aren't using Mlflow AI Gateway (altough this or LiteLLM could be an adition in the future) 151 | 152 | 153 | # Install 154 | 155 | This section details the requirements, actions, and next steps to kickstart your LLMOps project. 156 | 157 | ## Prerequisites 158 | 159 | - [Python>=3.10](https://www.python.org/downloads/): to benefit from [the latest features and performance improvements](https://docs.python.org/3/whatsnew/3.12.html) 160 | - [Poetry>=1.8.2](https://python-poetry.org/): to initialize the project [virtual environment](https://docs.python.org/3/library/venv.html) and its dependencies 161 | 162 | Use the package manager [Poetry](https://python-poetry.org/): 163 | 164 | ## Credentials for LLM 165 | 166 | To access Bedrock, OpenAI, or any other LLM provider, you need to set up your credentials. These credentials will allow the package to authenticate and interact with the respective services. 167 | In this code template we used Bedrock but feel free to change it to your needs. 168 | 169 | Example for AWS 170 | 171 | **Environment Variables**: 172 | ```bash 173 | export AWS_ACCESS_KEY_ID=your_access_key_id 174 | export AWS_SECRET_ACCESS_KEY=your_secret_access_key 175 | export AWS_REGION=your_default_region 176 | ``` 177 | 178 | 179 | - You can easily replace `ChatBedrock` with `ChatOllama` or any other provider. 180 | 181 | 182 | ## Installation 183 | 184 | 1. [Clone this GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) on your computer 185 | ```bash 186 | # with ssh (recommended) 187 | $ git clone - 188 | # with https 189 | $ git clone - 190 | ``` 191 | 2. [Run the project installation with poetry](https://python-poetry.org/docs/) 192 | ```bash 193 | $ cd llmops-python-package/ 194 | $ poetry install 195 | ``` 196 | 3. Adapt the code base to your desire 197 | 198 | ## Next Steps 199 | 200 | There are numerous ways to incorporate this package into your MLOps platform. 201 | 202 | For example, you might choose Databricks or AWS for your compute platform and model registry. 203 | 204 | Feel free to modify the package code to suit your specific needs. Best of luck! 205 | 206 | 207 | # Usage 208 | 209 | This section explains how configure the project code and execute it on your system. 210 | 211 | ## Configuration 212 | 213 | You can add or edit config files in the `confs/` folder to change the program behavior. 214 | 215 | ```yaml 216 | # confs/deployment.yaml 217 | job: 218 | KIND: DeploymentJob 219 | staging_alias: "champion" 220 | production_alias: "production" 221 | registry_model_name: "rag-chatbot-with-guardrails" 222 | llm_confs: "/confs/rag_chain_config.yaml" 223 | llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py" 224 | vector_store_path: "http://localhost:6333" 225 | ``` 226 | 227 | This config file instructs the program to start a `DeploymentJob` with respective parameters 228 | You can find all the parameters of your program in the `src/[package]/pipelines/*.py` files. 229 | 230 | You can also print the full schema supported by this package using `poetry run llmops --schema`. 231 | 232 | ## Execution 233 | 234 | The project code can be executed with poetry during your development, this is the order recommended: 235 | 236 | ```bash 237 | $ poetry run llmops-project confs/generate_rag_dataset.yaml # Run once to generate rag dataset 238 | $ poetry run llmops-project confs/feature_eng.yaml # Creates Vector DB and Injests documents 239 | $ poetry run llmops-project confs/deployment.yaml # Deploys model on model registry 240 | $ poetry run llmops-project confs/monitoring.yaml # Monitors Model Inferences "every week" 241 | ``` 242 | 243 | To deploy the serving endpoint you can use the following automation: 244 | 245 | ```bash 246 | $ inv serve # Launches Litserve server on port 8000 247 | ``` 248 | 249 | Note: you can also deploy this as a container /cloud with the instructions under `/serving_endpoint` 250 | 251 | ## Pipelines 252 | This project is organized under a manager pattern, each manager is responsible for all the workflow orchestration betwen tasks/ jobs. (In production you could use airflow etc.. for this type of thing) 253 | 254 | ### Generate Rag Dataset 255 | This pipeline generates a rag QA dataset under `/data/datasets/`` 256 | 257 | ### Feature Engineering 258 | This pipeline creates a Vector Database instance collection and ingests documents onto it in the form of vectors. 259 | ![Vector Database](static/vector_db.png) 260 | 261 | ### Deployment 262 | This pipeline: 263 | - registers a model using Mlflow 264 | - promote the model to `champion`alias 265 | - validates model input /output and singatures 266 | - sets tag "passed_tests" on mlflow registry to True/False depending if model passed tests 267 | - runs an evaluation of QA factfullness on the QA dataset we created previously 268 | - depending on the result of this evaluation, the model will be asigned a tag `beats_threshold` to True or False 269 | - if the model `beats_threshold` and `passed_tests` we can promote it to `production` 270 | 271 | At the end of this pipeline we should have a model version on the model registry in production. 272 | ![Model Version](static/model_version.png) 273 | 274 | 275 | ### Monitoring 276 | This pipeline is meant to be run as weekly job to monitor the performance of the model against given metrics such as default metrics or even LLM as a judge. 277 | ![Monitoring](static/monitoring.png) 278 | 279 | These metrics are also saved with a display in case you want to load it in a dashboard elsewhere. 280 | ![Guage](static/guage.png) 281 | 282 | 283 | 284 | 285 | In production, you can build, ship, and run the project as a Python package: 286 | 287 | ```bash 288 | poetry build 289 | poetry publish # optional 290 | python -m pip install [package] 291 | [package] confs/deployment.yaml 292 | ``` 293 | 294 | You can also install and use this package as a library for another AI/ML project: 295 | 296 | ```python 297 | from [package] import pipelines 298 | 299 | job = pipelines.DeploymentJob(...) 300 | with job as runner: 301 | runner.run() 302 | ``` 303 | 304 | **Additional tips**: 305 | - You can pass extra configs from the command line using the `--extras` flag 306 | - Use it to pass runtime values (e.g., a result from previous job executions) 307 | - You can pass several config files in the command-line to merge them from left to right 308 | - You can define common configurations shared between jobs (e.g., model params) 309 | - The right job task will be selected automatically thanks to [Pydantic Discriminated Unions](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions) 310 | - This is a great way to run any job supported by the application (training, tuning, ....) 311 | 312 | 313 | ## Automation 314 | 315 | This project includes several automation tasks to easily repeat common actions. 316 | 317 | You can invoke the actions from the [command-line](https://www.pyinvoke.org/) or [VS Code extension](https://marketplace.visualstudio.com/items?itemName=dchanco.vsc-invoke). 318 | 319 | ```bash 320 | # create a code archive 321 | $ inv packages 322 | # list other actions 323 | $ inv --list 324 | ``` 325 | 326 | **Available tasks**: 327 | - **checks.all (checks)** - Run all check tasks. 328 | - **checks.code** - Check the codes with ruff. 329 | - **checks.coverage** - Check the coverage with coverage. 330 | - **checks.format** - Check the formats with ruff. 331 | - **checks.poetry** - Check poetry config files. 332 | - **checks.security** - Check the security with bandit. 333 | - **checks.test** - Check the tests with pytest. 334 | - **checks.type** - Check the types with mypy. 335 | - **cleans.all (cleans)** - Run all tools and folders tasks. 336 | - **cleans.cache** - Clean the cache folder. 337 | - **cleans.coverage** - Clean the coverage tool. 338 | - **cleans.dist** - Clean the dist folder. 339 | - **cleans.docs** - Clean the docs folder. 340 | - **cleans.environment** - Clean the project environment file. 341 | - **cleans.folders** - Run all folders tasks. 342 | - **cleans.mlruns** - Clean the mlruns folder. 343 | - **cleans.mypy** - Clean the mypy tool. 344 | - **cleans.outputs** - Clean the outputs folder. 345 | - **cleans.poetry** - Clean poetry lock file. 346 | - **cleans.pytest** - Clean the pytest tool. 347 | - **cleans.projects** - Run all projects tasks. 348 | - **cleans.python** - Clean python caches and bytecodes. 349 | - **cleans.requirements** - Clean the project requirements file. 350 | - **cleans.reset** - Run all tools, folders, and sources tasks. 351 | - **cleans.ruff** - Clean the ruff tool. 352 | - **cleans.sources** - Run all sources tasks. 353 | - **cleans.tools** - Run all tools tasks. 354 | - **cleans.venv** - Clean the venv folder. 355 | - **commits.all (commits)** - Run all commit tasks. 356 | - **commits.bump** - Bump the version of the package. 357 | - **commits.commit** - Commit all changes with a message. 358 | - **commits.info** - Print a guide for messages. 359 | - **containers.all (containers)** - Run all container tasks. 360 | - **containers.build** - Build the container image with the given tag. 361 | - **containers.compose** - Start up docker compose. 362 | - **containers.run** - Run the container image with the given tag. 363 | - **docs.all (docs)** - Run all docs tasks. 364 | - **docs.api** - Document the API with pdoc using the given format and output directory. 365 | - **docs.serve** - Serve the API docs with pdoc using the given format and computer port. 366 | - **formats.all** - (formats) Run all format tasks. 367 | - **formats.imports** - Format python imports with ruff. 368 | - **formats.sources** - Format python sources with ruff. 369 | - **installs.all (installs)** - Run all install tasks. 370 | - **installs.poetry** - Install poetry packages. 371 | - **installs.pre-commit** - Install pre-commit hooks on git. 372 | - **mlflow.all (mlflow)** - Run all mlflow tasks. 373 | - **mlflow.doctor** - Run mlflow doctor to diagnose issues. 374 | - **mlflow.serve** - Start mlflow server with the given host, port, and backend uri. 375 | - **packages.all (packages)** - Run all package tasks. 376 | - **packages.build** - Build a python package with the given format. 377 | - **projects.all (projects)** - Run all project tasks. 378 | - **projects.environment** - Export the project environment file. 379 | - **projects.requirements** - Export the project requirements file. 380 | - **projects.run** - Run an mlflow project from MLproject file. 381 | 382 | # Tools 383 | 384 | This sections motivates the use of developer tools to improve your coding experience. 385 | - Most developer tools in this project are the same ones used in the mlops-python package. Check that resource for a detailed explanation on the motivation behind these toolings. 386 | 387 | 388 | 389 | # Tips 390 | 391 | This sections gives some tips and tricks to enrich the develop experience. 392 | 393 | ## [Design Patterns](https://en.wikipedia.org/wiki/Software_design_pattern) 394 | 395 | ### [Directed-Acyclic Graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph) 396 | 397 | **You should use Directed-Acyclic Graph (DAG) to connect the steps of your ML pipeline.** 398 | 399 | A DAG can express the dependencies between steps while keeping the individual step independent. 400 | 401 | This package provides a simple DAG example in `tasks/dags.py`. This approach is based on [PyInvoke](https://www.pyinvoke.org/). 402 | 403 | In production, we recommend to use a scalable system such as [Airflow](https://airflow.apache.org/), [Dagster](https://dagster.io/), [Prefect](https://www.prefect.io/), [Metaflow](https://metaflow.org/), or [ZenML](https://zenml.io/). 404 | 405 | ### [Program Service](https://en.wikipedia.org/wiki/Systemd) 406 | 407 | **You should provide a global context for the execution of your program.** 408 | 409 | There are several approaches such as [Singleton](https://en.wikipedia.org/wiki/Singleton_pattern), [Global Variable](https://en.wikipedia.org/wiki/Global_variable), or [Component](https://github.com/stuartsierra/component). 410 | 411 | This package takes inspiration from [Clojure mount](https://github.com/tolitius/mount). It provides an implementation in `src/[package]/io/services.py`. 412 | 413 | ### [Soft Coding](https://en.wikipedia.org/wiki/Softcoding) 414 | 415 | **You should separate the program implementation from the program configuration.** 416 | 417 | Exposing configurations to users allow them to influence the execution behavior without code changes. 418 | 419 | This package seeks to expose as much parameter as possible to the users in configurations stored in the `confs/` folder. 420 | 421 | ### [SOLID Principles](https://en.wikipedia.org/wiki/SOLID) 422 | 423 | **You should implement the SOLID principles to make your code as flexible as possible.** 424 | 425 | - *Single responsibility principle*: Class has one job to do. Each change in requirements can be done by changing just one class. 426 | - *Open/closed principle*: Class is happy (open) to be used by others. Class is not happy (closed) to be changed by others. 427 | - *Liskov substitution principle*: Class can be replaced by any of its children. Children classes inherit parent's behaviours. 428 | - *Interface segregation principle*: When classes promise each other something, they should separate these promises (interfaces) into many small promises, so it's easier to understand. 429 | - *Dependency inversion principle*: When classes talk to each other in a very specific way, they both depend on each other to never change. Instead classes should use promises (interfaces, parents), so classes can change as long as they keep the promise. 430 | 431 | In practice, this mean you can implement software contracts with interface and swap the implementation. 432 | 433 | For instance, you can implement several jobs in `src/[package]/jobs/*.py` and swap them in your configuration. 434 | 435 | To learn more about the mechanism select for this package, you can check the documentation for [Pydantic Tagged Unions](https://docs.pydantic.dev/dev-v2/usage/types/unions/#discriminated-unions-aka-tagged-unions). 436 | 437 | ### [IO Separation](https://en.wikibooks.org/wiki/Haskell/Understanding_monads/IO) 438 | 439 | **You should separate the code interacting with the external world from the rest.** 440 | 441 | The external is messy and full of risks: missing files, permission issue, out of disk ... 442 | 443 | To isolate these risks, you can put all the related code in an `io` package and use interfaces 444 | 445 | ## [Python Powers](https://realpython.com/) 446 | 447 | ### [Context Manager](https://docs.python.org/3/library/contextlib.html) 448 | 449 | **You should use Python context manager to control and enhance an execution.** 450 | 451 | Python provides contexts that can be used to extend a code block. For instance: 452 | 453 | ```python 454 | # in src/[package]/scripts.py 455 | with job as runner: # context 456 | runner.run() # run in context 457 | ``` 458 | 459 | This pattern has the same benefit as [Monad](https://en.wikipedia.org/wiki/Monad_(functional_programming)), a powerful programming pattern. 460 | 461 | The package uses `src/[package]/jobs/*.py` to handle exception and services. 462 | 463 | ### [Python Package](https://packaging.python.org/en/latest/tutorials/packaging-projects/) 464 | 465 | **You should create Python package to create both library and application for others.** 466 | 467 | Using Python package for your AI/ML project has the following benefits: 468 | - Build code archive (i.e., wheel) that be uploaded to Pypi.org 469 | - Install Python package as a library (e.g., like pandas) 470 | - Expose script entry points to run a CLI or a GUI 471 | 472 | To build a Python package with Poetry, you simply have to type in a terminal: 473 | ```bash 474 | # for all poetry project 475 | poetry build 476 | # for this project only 477 | inv packages 478 | ``` 479 | 480 | ## [Software Engineering](https://en.wikipedia.org/wiki/Software_engineering) 481 | 482 | ### [Code Typing](https://docs.python.org/3/library/typing.html) 483 | 484 | **You should type your Python code to make it more robust and explicit for your user.** 485 | 486 | Python provides the [typing module](https://docs.python.org/3/library/typing.html) for adding type hints and [mypy](https://mypy-lang.org/) to checking them. 487 | 488 | 489 | This code snippet clearly state the inputs and outputs of the method, both for the developer and the type checker. 490 | 491 | The package aims to type every functions and classes to facilitate the developer experience and fix mistakes before execution. 492 | 493 | ### [Config Typing](https://docs.pydantic.dev/latest/) 494 | 495 | **You should type your configuration to avoid exceptions during the program execution.** 496 | 497 | Pydantic allows to define classes that can validate your configs during the program startup. 498 | 499 | ```python 500 | # in src/[package]/utils/splitters.py 501 | class TrainTestSplitter(Splitter): 502 | shuffle: bool = False # required (time sensitive) 503 | test_size: int | float = 24 * 30 * 2 # 2 months 504 | random_state: int = 42 505 | ``` 506 | 507 | This code snippet allows to communicate the values expected and avoid error that could be avoided. 508 | 509 | The package combines both OmegaConf and Pydantic to parse YAML files and validate them as soon as possible. 510 | 511 | 512 | ### [Object Oriented](https://en.wikipedia.org/wiki/Object-oriented_programming) 513 | 514 | **You should use the Objected Oriented programming to benefit from [polymorphism](https://en.wikipedia.org/wiki/Polymorphism_(computer_science)).** 515 | 516 | Polymorphism combined with SOLID Principles allows to easily swap your code components. 517 | 518 | The package defines class interface whenever possible to provide intuitive and replaceable parts for your AI/ML project. 519 | 520 | ### [Semantic Versioning](https://semver.org/) 521 | 522 | **You should use semantic versioning to communicate the level of compatibility of your releases.** 523 | 524 | Semantic Versioning (SemVer) provides a simple schema to communicate code changes. For package X.Y.Z: 525 | - *Major* (X): major release with breaking changed (i.e., imply actions from the benefit) 526 | - *Minor* (Y): minor release with new features (i.e., provide new capabilities) 527 | - *Patch* (Z): patch release to fix bugs (i.e., correct wrong behavior) 528 | 529 | Poetry and this package leverage Semantic Versioning to let developers control the speed of adoption for new releases. 530 | 531 | ## [Testing Tricks](https://en.wikipedia.org/wiki/Software_testing) 532 | 533 | ### [Parallel Testing](https://pytest-xdist.readthedocs.io/en/stable/) 534 | 535 | **You can run your tests in parallel to speed up the validation of your code base.** 536 | 537 | Pytest can be extended with the [pytest-xdist plugin](https://pytest-xdist.readthedocs.io/en/stable/) for this purpose. 538 | 539 | This package enables Pytest in its automation tasks by default. 540 | 541 | ### [Test Fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html) 542 | 543 | **You should define reusable objects and actions for your tests with [fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html).** 544 | 545 | Fixture can prepare objects for your test cases, such as dataframes, models, files. 546 | 547 | This package defines fixtures in `tests/conftest.py` to improve your testing experience. 548 | 549 | ## [VS Code](https://code.visualstudio.com/) 550 | 551 | ### [Code Workspace](https://code.visualstudio.com/docs/editor/workspaces) 552 | 553 | **You can use VS Code workspace to define configurations for your project.** 554 | 555 | [Code Workspace](https://code.visualstudio.com/docs/editor/workspaces) can enable features (e.g. formatting) and set the default interpreter. 556 | 557 | ```json 558 | { 559 | "settings": { 560 | "editor.formatOnSave": true, 561 | "python.defaultInterpreterPath": ".venv/bin/python", 562 | ... 563 | }, 564 | } 565 | ``` 566 | 567 | This package defines a workspace file that you can load from `[package].code-workspace`. 568 | 569 | ### [GitHub Copilot](https://github.com/features/copilot) 570 | 571 | **You can use GitHub Copilot to increase your coding productivity by 30%.** 572 | 573 | [GitHub Copilot](https://github.com/features/copilot) has been a huge productivity thanks to its smart completion. 574 | 575 | You should become familiar with the solution in less than a single coding session. 576 | 577 | ### [VSCode VIM](https://marketplace.visualstudio.com/items?itemName=vscodevim.vim) 578 | 579 | **You can use VIM keybindings to more efficiently navigate and modify your code.** 580 | 581 | Learning VIM is one of the best investment for a career in IT. It can make you 30% more productive. 582 | 583 | Compared to GitHub Copilot, VIM can take much more time to master. You can expect a ROI in less than a month. 584 | 585 | # Resources 586 | 587 | This section provides resources for building packages for Python and AI/ML/MLOps. 588 | 589 | ## Python 590 | 591 | - https://github.com/krzjoa/awesome-python-data-science#readme 592 | - https://github.com/ml-tooling/best-of-ml-python 593 | - https://github.com/ml-tooling/best-of-python 594 | - https://github.com/ml-tooling/best-of-python-dev 595 | - https://github.com/vinta/awesome-python 596 | 597 | ## AI/ML/MLOps 598 | 599 | - https://github.com/josephmisiti/awesome-machine-learning 600 | - https://github.com/visenger/awesome-mlops 601 | 602 | 603 | 604 | 605 | 606 | -------------------------------------------------------------------------------- /confs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/confs/.gitkeep -------------------------------------------------------------------------------- /confs/deployment.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: DeploymentJob 3 | staging_alias: "champion" 4 | production_alias: "production" 5 | registry_model_name: "rag-chatbot-with-guardrails" 6 | llm_confs: "/confs/rag_chain_config.yaml" 7 | llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py" 8 | vector_store_path: "https://34beb054-2278-47fe-9731-cd7ed574320f.eu-west-2-0.aws.cloud.qdrant.io:6333" #"http://localhost:6333" 9 | qa_dataset_path: "/data/datasets/rag_dataset.csv" 10 | alias: "champion" 11 | 12 | metric_tresholds: 13 | flesch_kincaid_grade_level_mean: 5.1 # bigger than 14 | ari_grade_level_mean: 4.1 # bigger than -------------------------------------------------------------------------------- /confs/feature_eng.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: FeatureEngineeringJob 3 | embedding_model: "amazon.titan-embed-text-v1" 4 | vector_store_path: "http://localhost:6333" 5 | document_path: "/data/documents/" 6 | collection_name: "hr-documents" -------------------------------------------------------------------------------- /confs/generate_rag_dataset.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: GenerateRagDatasetJob 3 | data_path: "/data/documents/" 4 | qa_dataset_path_csv: "data/datasets/rag_dataset.csv" # will save a QA dataset in csv and json 5 | qa_dataset_path_json: "data/datasets/rag_dataset.json" # will save a QA dataset in csv and json 6 | llm_model: "anthropic.claude-3-haiku-20240307-v1:0" # model_id to generate the QA dataset 7 | -------------------------------------------------------------------------------- /confs/monitoring.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: MonitoringEvalJob 3 | vector_store_path: "/faiss_db/" 4 | registry_model_name: "rag-chatbot" 5 | 6 | trace_experiment_name: "rag_chatbot_experiment" 7 | monitoring_experiment_name: "monitoring" 8 | 9 | -------------------------------------------------------------------------------- /confs/rag_chain_config.yaml: -------------------------------------------------------------------------------- 1 | guardrail_config: 2 | model: anthropic.claude-3-haiku-20240307-v1:0 3 | topics: 4 | valid: 5 | - HR Policies 6 | - company culture 7 | - team building 8 | - leadership 9 | - management 10 | - productivity 11 | 12 | invalid: 13 | - software programming 14 | - religion 15 | - politics 16 | - sports 17 | input_example: 18 | messages: 19 | - content: What is the company's sick leave policy? 20 | role: user 21 | - content: The company's sick leave policy allows employees to take a certain number 22 | of sick days per year. Please refer to the employee handbook for specific details 23 | and eligibility criteria. 24 | role: assistant 25 | - content: How many sick days can I take per year? 26 | role: user 27 | output_example: 28 | result: "example text" 29 | sources: 30 | - "example_source_1.pdf" 31 | - "example_source_2.pdf" 32 | 33 | llm_config: 34 | llm_model: anthropic.claude-3-haiku-20240307-v1:0 35 | llm_parameters: 36 | max_tokens: 4000 37 | temperature: 0.01 38 | llm_prompt_template: "\n You are a trustful assistant for HR Policies.\ 39 | \ You are answering employee benefits, leave policies, performance management,\ 40 | \ recruitment, onboarding, and other HR-related topics. If you do not know the\ 41 | \ answer to a question, you truthfully say you do not know. Read the discussion\ 42 | \ to get the context of the previous conversation. In the chat discussion, you\ 43 | \ are referred to as \"system\". The user is referred to as \"user\".\n\n \ 44 | \ Discussion: {chat_history}\n\n Here's some context\ 45 | \ which might or might not help you answer: {context}\n\n Answer\ 46 | \ straight, do not repeat the question, do not start with something like: the\ 47 | \ answer to the question, do not add \"AI\" in front of your answer, do not say:\ 48 | \ here is the answer, do not mention the context or the question.\n\n \ 49 | \ Based on this history and context, answer this question: {question}\n\ 50 | \ " 51 | llm_refusal_fallback_answer: I cannot answer this question. 52 | query_rewriter_prompt_template: "\n Based on the chat history below,\ 53 | \ we want you to generate a query for an external data source to retrieve relevant\ 54 | \ documents so that we can better answer the question. The query should be in\ 55 | \ natual language. The external data source uses similarity search to search for\ 56 | \ relevant documents in a vector space. So the query should be similar to the\ 57 | \ relevant documents semantically. Answer with only the query. Do not add explanation.\n\ 58 | \n Chat history: {chat_history}\n\n Question: {question}\n\ 59 | \ " 60 | retriever_config: 61 | embedding_model: "amazon.titan-embed-text-v1" 62 | parameters: 63 | k: 10 64 | score_threshold: 0.5 65 | schema: 66 | document_uri: source 67 | vector_store_path: "http://localhost:6333" 68 | collection_name: "hr-documents" 69 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/.gitkeep -------------------------------------------------------------------------------- /data/datasets/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/datasets/.gitkeep -------------------------------------------------------------------------------- /data/documents/sample_hr_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/documents/sample_hr_manual.pdf -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # https://docs.docker.com/compose/compose-file/ 2 | 3 | services: 4 | mlflow: 5 | image: ghcr.io/mlflow/mlflow:v2.17.1 6 | ports: 7 | - 5001:5000 8 | environment: 9 | - MLFLOW_HOST=0.0.0.0 10 | command: mlflow server 11 | 12 | qdrant: 13 | image: qdrant/qdrant:latest 14 | restart: always 15 | container_name: qdrant 16 | ports: 17 | - 6333:6333 18 | - 6334:6334 19 | expose: 20 | - 6333 21 | - 6334 22 | - 6335 23 | configs: 24 | - source: qdrant_config 25 | target: /qdrant/config/production.yaml 26 | volumes: 27 | - ./qdrant_data:/qdrant/storage 28 | 29 | configs: 30 | qdrant_config: 31 | content: | 32 | log_level: INFO -------------------------------------------------------------------------------- /docs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/docs/.gitkeep -------------------------------------------------------------------------------- /invoke.yaml: -------------------------------------------------------------------------------- 1 | # https://docs.pyinvoke.org/en/latest/index.html 2 | 3 | run: 4 | echo: true 5 | project: 6 | name: LLMOps Project 7 | package: llmops_project 8 | repository: llmops-project 9 | -------------------------------------------------------------------------------- /llmops-project.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "editor.formatOnSave": true, 9 | "python.defaultInterpreterPath": ".venv/bin/python", 10 | "python.testing.pytestEnabled": true, 11 | "python.testing.pytestArgs": [ 12 | "tests" 13 | ], 14 | "[python]": { 15 | "editor.codeActionsOnSave": { 16 | "source.organizeImports": "explicit" 17 | }, 18 | "editor.defaultFormatter": "charliermarsh.ruff", 19 | }, 20 | }, 21 | "extensions": { 22 | "recommendations": [ 23 | "charliermarsh.ruff", 24 | "dchanco.vsc-invoke", 25 | "ms-python.mypy-type-checker", 26 | "ms-python.python", 27 | "ms-python.vscode-pylance", 28 | "redhat.vscode-yaml", 29 | ] 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /mlruns/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/mlruns/.gitkeep -------------------------------------------------------------------------------- /notebooks/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/notebooks/.gitkeep -------------------------------------------------------------------------------- /outputs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/outputs/.gitkeep -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | # https://python-poetry.org/docs/configuration/ 2 | 3 | [virtualenvs] 4 | in-project = true 5 | prefer-active-python = true 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # https://python-poetry.org/docs/pyproject/ 2 | 3 | # PROJECT 4 | 5 | [tool.poetry] 6 | name = "llmops-project" 7 | version = "0.2.0" 8 | description = "LLMOps Package Production Ready template using open source technologies." 9 | repository = "https://github.com/callmesora/llmops-project" 10 | documentation = "https://callmesora.github.io/llmops-project/" 11 | authors = ["callmesora"] 12 | readme = "README.md" 13 | license = "MIT" 14 | packages = [{ include = "llmops_project", from = "src" }] 15 | 16 | # SCRIPTS 17 | 18 | [tool.poetry.scripts] 19 | llmops-project = "llmops_project.scripts:main" 20 | 21 | # DEPENDENCIES 22 | 23 | [tool.poetry.dependencies] 24 | python = ">=3.10,<3.13" 25 | mlflow = {extras = ["genai"], version = "^2.17.2"} 26 | setuptools = "^71.1.0" 27 | langchain = "^0.3.5" 28 | # langchain-ollama = "^0.2.0" 29 | llama-index = "^0.12.0" 30 | # langgraph = "0.2.27" 31 | #llama-index-utils-workflow = "^0.2.2" 32 | #llama-index-llms-ollama = "^0.3.4" 33 | # gensim = "^4.0.0" 34 | nltk = "^3.9.1" 35 | langchain-community = "^0.3.4" 36 | transformers = "^4.46.1" 37 | #streamlit = "^1.39.0" 38 | pypdf2 = "^3.0.1" 39 | defusedxml = "^0.7.1" 40 | #faiss-cpu = "^1.9.0" 41 | spacy = "3.7.0" 42 | textstat = "^0.7.4" 43 | torch = "^2.5.1" 44 | evaluate = "^0.4.3" 45 | loguru = "^0.7.2" 46 | omegaconf = "^2.3.0" 47 | pandera = "^0.21.0" 48 | #sagemaker-mlflow = "0.1.0" 49 | langchain-aws = "^0.2.7" 50 | langchain-openai = "^0.2.9" 51 | plotly = "5.3.1" 52 | kaleido = "0.2.1" 53 | seaborn = "^0.13.2" 54 | sagemaker = "^2.235.1" 55 | litserve = "^0.2.4" 56 | #nemoguardrails = "^0.11.0" 57 | guardrails-ai = "^0.6.0" 58 | tenacity = ">=8.2.0,<8.4.0" 59 | presidio-analyzer = "^2.2.355" 60 | presidio-anonymizer = "^2.2.355" 61 | qdrant-client = "^1.12.1" 62 | unstructured = {extras = ["pdf"], version = "^0.16.8"} 63 | libmagic = "^1.0" 64 | python-magic = "^0.4.27" 65 | langchain-qdrant = "^0.2.0" 66 | pytest = "^8.3.4" 67 | pytest-ordering = "^0.6" 68 | llama-index-llms-bedrock = "^0.3.1" 69 | 70 | 71 | 72 | [tool.poetry.group.checks.dependencies] 73 | bandit = "^1.7.9" 74 | coverage = "^7.5.4" 75 | mypy = "^1.10.1" 76 | pytest = "^8.2.2" 77 | pytest-cov = "^5.0.0" 78 | pytest-xdist = "^3.6.1" 79 | ruff = "^0.5.0" 80 | 81 | [tool.poetry.group.commits.dependencies] 82 | commitizen = "^3.27.0" 83 | pre-commit = "^3.7.1" 84 | 85 | [tool.poetry.group.dev.dependencies] 86 | invoke = "^2.2.0" 87 | 88 | [tool.poetry.group.docs.dependencies] 89 | pdoc = "^14.5.1" 90 | 91 | [tool.poetry.group.notebooks.dependencies] 92 | ipykernel = "^6.29.4" 93 | nbformat = "^5.10.4" 94 | 95 | # CONFIGURATIONS 96 | 97 | [tool.bandit] 98 | targets = ["src"] 99 | 100 | [tool.commitizen] 101 | name = "cz_conventional_commits" 102 | tag_format = "v$version" 103 | version_scheme = "pep440" 104 | version_provider = "poetry" 105 | update_changelog_on_bump = true 106 | 107 | [tool.coverage.run] 108 | branch = true 109 | source = ["src"] 110 | omit = ["__main__.py"] 111 | 112 | [tool.mypy] 113 | pretty = true 114 | strict = false 115 | python_version = "3.12" 116 | check_untyped_defs = true 117 | ignore_missing_imports = true 118 | plugins = ["pandera.mypy", "pydantic.mypy"] 119 | no_implicit_optional= false 120 | 121 | # Ignore specific error codes 122 | disable_error_code = "no-untyped-call" 123 | 124 | [tool.pytest.ini_options] 125 | addopts = "--verbosity=2" 126 | pythonpath = ["src"] 127 | filterwarnings = [ 128 | "ignore:Pydantic V1 style `@validator` validators are deprecated:DeprecationWarning", 129 | "ignore:Pydantic V1 style `@root_validator` validators are deprecated:DeprecationWarning" 130 | ] 131 | 132 | 133 | [tool.ruff] 134 | fix = true 135 | indent-width = 4 136 | line-length = 100 137 | target-version = "py312" 138 | 139 | [tool.ruff.format] 140 | docstring-code-format = true 141 | 142 | [tool.ruff.lint.pydocstyle] 143 | convention = "google" 144 | 145 | [tool.ruff.lint.per-file-ignores] 146 | "tests/*.py" = ["D100", "D103"] 147 | 148 | # SYSTEMS 149 | 150 | [build-system] 151 | requires = ["poetry-core"] 152 | build-backend = "poetry.core.masonry.api" 153 | -------------------------------------------------------------------------------- /serving_endpoint/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a multi-stage build to first get uv 2 | FROM ghcr.io/astral-sh/uv:0.5.4 as uv 3 | FROM python:3.12-slim 4 | 5 | # Define build arguments 6 | ARG GUARDRAILS_TOKEN 7 | 8 | # Create a virtual environment with uv inside the container 9 | RUN --mount=from=uv,source=/uv,target=./uv \ 10 | ./uv venv /opt/venv 11 | 12 | # We need to set this environment variable so that uv knows where 13 | # the virtual environment is to install packages 14 | ENV VIRTUAL_ENV=/opt/venv 15 | 16 | # Make sure that the virtual environment is in the PATH so 17 | # we can use the binaries of packages that we install such as pip 18 | # without needing to activate the virtual environment explicitly 19 | ENV PATH="/opt/venv/bin:$PATH" 20 | 21 | # Copy the requirements file into the container 22 | COPY requirements.txt . 23 | 24 | # Install the packages with uv using --mount=type=cache to cache the downloaded packages 25 | RUN --mount=type=cache,target=/root/.cache/uv \ 26 | --mount=from=uv,source=/uv,target=./uv \ 27 | ./uv pip install -r requirements.txt litserve==0.2.4 28 | 29 | # Print to terminal Guardrail token to see if it is set correctly 30 | 31 | # Set the GUARDRAILS_TOKEN as an environment variable 32 | ENV GUARDRAILS_TOKEN=${GUARDRAILS_TOKEN} 33 | 34 | RUN guardrails configure --enable-metrics --enable-remote-inferencing --token ${GUARDRAILS_TOKEN} 35 | 36 | RUN guardrails hub install hub://tryolabs/restricttotopic 37 | 38 | 39 | WORKDIR /app 40 | COPY . /app 41 | 42 | EXPOSE 8000 43 | CMD ["python", "/app/server.py"] -------------------------------------------------------------------------------- /serving_endpoint/README.md: -------------------------------------------------------------------------------- 1 | # ML Serving Endpoint with LitServe 2 | 3 | This project sets up a FastAPI server using `litserve` that loads the latest MLflow model from the model registry and runs inference on it. The server includes input and output validations using Pydantic and can scale well to CPU/GPU workloads with batch capability. 4 | 5 | ![ML Serving Endpoint](./imgs/litserve.png) 6 | 7 | ## ML Design Pattern 8 | In this example we will be looking at a realistic LLMOps example where we are loading a langchain model from a model registry and serving it in a LitServe container 9 | 10 | ![ML Serving Endpoint](./imgs/litserve-deployment.png) 11 | 12 | 13 | ## MLflow Model Registry 14 | 15 | MLflow Model Registry is a centralized repository to manage and deploy machine learning models. It provides model lineage, versioning, and lifecycle management. Models can be registered, and different versions of the model can be tracked and deployed. 16 | 17 | ![ML Serving Endpoint](./imgs/model-registry.webp) 18 | 19 | 20 | ## Loading Model from Registry 21 | 22 | The server loads the latest model from the MLflow model registry. This pattern ensures that the most recent and validated model is always used for inference. The model is loaded at the server startup and is ready to serve predictions. 23 | 24 | ![ML Serving Endpoint](./imgs/model-registry.webp) 25 | 26 | 27 | ## Server Details 28 | 29 | The FastAPI server (`litserve`) is designed to handle various workloads, including CPU and GPU, and supports batch processing for efficient inference. Input and output data are validated using Pydantic to ensure data integrity and correctness. 30 | 31 | ## Deployment 32 | 33 | You can choose to deploy this on kubernetes or your cloud provider as well as at the edge. 34 | 35 | ## Instructions 36 | 37 | ## Setup ENV File 38 | Setup an .env file with the following variables 39 | 40 | ```.env 41 | # For Running the model 42 | OPENAI_API_KEY= ... # or any other provider API key if needed 43 | 44 | # If your MLflow tracking server is on AWS 45 | AWS_ACCESS_KEY_ID= ... 46 | AWS_SECRET_ACCESS_KEY= ... 47 | AWS_REGION= ... 48 | 49 | ``` 50 | ### Build the Docker Image 51 | 52 | To build the Docker image, run the following command: 53 | 54 | ```sh 55 | docker build --build-arg GUARDRAILS_TOKEN=$(grep GUARDRAILS_TOKEN .env | cut -d '=' -f2) -t litserve-model:latest . 56 | ``` 57 | ### Run the Docker Image 58 | 59 | To run the Docker image, use the following command: 60 | 61 | ```sh 62 | docker run --env-file .env -p 8000:8000 litserve-model:latest 63 | ``` 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /serving_endpoint/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | vector_store_path = "http://localhost:6333" 4 | 5 | 6 | non_relevant_dialog = { # This will test Guardrail 7 | "messages": [ 8 | {"role": "user", "content": "What is the company's sick leave policy?"}, 9 | { 10 | "role": "assistant", 11 | "content": "The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.", 12 | }, 13 | {"role": "user", "content": "What is the meaning of life?"}, 14 | ], 15 | "vector_store_path": vector_store_path, 16 | } 17 | 18 | relevant_dialog = { # This will test schema 19 | "messages": [ 20 | {"role": "user", "content": "What is discussed in the HR manual?"}, 21 | ], 22 | "vector_store_path": vector_store_path, 23 | } 24 | 25 | 26 | response = requests.post("http://localhost:8000/predict", json=non_relevant_dialog) 27 | print(response.json()) 28 | # print(response.headers["X-Request-Id"]) # This will print "00000" 29 | 30 | 31 | print("-------------------") 32 | print("Relevant Dialog") 33 | 34 | response = requests.post("http://localhost:8000/predict", json=relevant_dialog) 35 | print(response.json()) 36 | -------------------------------------------------------------------------------- /serving_endpoint/ecs/Makefile: -------------------------------------------------------------------------------- 1 | include .env 2 | 3 | .EXPORT_ALL_VARIABLES: 4 | APP_NAME=litserve-endpoint 5 | 6 | TAG=latest 7 | TF_VAR_app_name=${APP_NAME} 8 | REGISTRY_NAME=${APP_NAME} 9 | TF_VAR_image=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REGISTRY_NAME}:${TAG} 10 | TF_VAR_region=${AWS_REGION} 11 | 12 | 13 | setup-ecr: 14 | cd infra/setup && terraform init && terraform apply -auto-approve 15 | 16 | deploy-container: 17 | sh deploy-image-to-ecs.sh 18 | 19 | deploy-service: 20 | cd infra/app && terraform init && terraform apply -auto-approve 21 | 22 | destroy-service: 23 | cd infra/app && terraform init && terraform destroy -auto-approve -------------------------------------------------------------------------------- /serving_endpoint/ecs/README.md: -------------------------------------------------------------------------------- 1 | # How to use this Repo 2 | ## Warning 3 | - Always make sure to destroy your API Service. Forgetting to do so could incur a large AWS fee 4 | - Never commit your AWS Account ID to git. Save it in an `.env` file and ensure `.env` is added to your `.gitiginore` 5 | 6 | ## Setup, Deploy, and Destroy 7 | 8 | ### Setup Env Variables 9 | Add an `.env` file containing your AWS account ID and region. Example file: 10 | ``` 11 | AWS_ACCOUNT_ID=1234567890 12 | AWS_REGION=ap-southeast-1 13 | ``` 14 | 15 | Create a `backend.tf` file and add it to both `/infra/setup/backend.tf` and `/infra/app/backend.tf`. Example files: 16 | ``` 17 | terraform { 18 | backend "s3" { 19 | region = "" 20 | bucket = "" 21 | key = "/terraform.tfstate" 22 | } 23 | } 24 | ``` 25 | ``` 26 | terraform { 27 | backend "s3" { 28 | region = "" 29 | bucket = "" 30 | key = "/terraform.tfstate" 31 | } 32 | } 33 | ``` 34 | Alternatively you can skip this step to store your Terraform state locally. 35 | 36 |
37 | 38 | ### Setup, Deploy, and Destroy Infrastructure/App 39 | All of the following commands are run via the Makefile. 40 | 41 | 1. Setup your ECR Repository (one time) 42 | ``` 43 | make setup-ecr 44 | ``` 45 | 46 |
47 | 48 | 2. Build and deploy your container 49 | ``` 50 | make deploy-container 51 | ``` 52 | 53 |
54 | 55 | 3. Deploy your API Service on ECS Fargate 56 | ``` 57 | make deploy-service 58 | ``` 59 | Note: The URL for your endpoint will be printed by Terraform once the above command is done executing. Example: `alb_dns_name = "-alb-123456789..elb.amazonaws.com"`. Navigate to that URL in your browser to ensure the API is working. You can also check out the API docs at the `/docs` endpoint. 60 | 61 |
62 | 63 | 4. Destroy your API Service on ECS Fargate 64 | ``` 65 | make destroy-service 66 | ``` 67 | 68 | Based on: 69 | https://medium.com/aspiring-data-scientist/deploy-a-fastapi-app-on-aws-ecs-034b8b7b5ac2 70 | https://github.com/tomsharp/fastapi-on-ecs/tree/main -------------------------------------------------------------------------------- /serving_endpoint/ecs/deploy-image-to-ecs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Logging in to ECR" 3 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com 4 | 5 | IMAGE_PATH="../" 6 | 7 | echo "Building image" 8 | docker build --no-cache --platform=linux/amd64 -t $REGISTRY_NAME $IMAGE_PATH 9 | 10 | echo "Tagging image" 11 | docker tag $REGISTRY_NAME:$TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$REGISTRY_NAME:$TAG 12 | 13 | echo "Pushing image to ECR" 14 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$REGISTRY_NAME:$TAG -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/ecs/main.tf: -------------------------------------------------------------------------------- 1 | # ALB 2 | resource "aws_security_group" "alb" { 3 | name = "${var.app_name}-alb-sg" 4 | vpc_id = var.vpc_id 5 | egress { 6 | from_port = 0 7 | to_port = 0 8 | protocol = "-1" 9 | cidr_blocks = ["0.0.0.0/0"] 10 | } 11 | ingress { 12 | from_port = 8000 13 | to_port = 8000 14 | protocol = "tcp" 15 | cidr_blocks = ["0.0.0.0/0"] 16 | } 17 | ingress { 18 | from_port = 443 19 | to_port = 443 20 | protocol = "tcp" 21 | cidr_blocks = ["0.0.0.0/0"] 22 | } 23 | } 24 | resource "aws_lb" "this" { 25 | name = "${var.app_name}-alb" 26 | load_balancer_type = "application" 27 | security_groups = [aws_security_group.alb.id] 28 | subnets = var.public_subnet_ids 29 | } 30 | resource "aws_lb_target_group" "this" { 31 | name = "${var.app_name}-lb-tg" 32 | vpc_id = var.vpc_id 33 | port = 8000 34 | protocol = "HTTP" 35 | target_type = "ip" 36 | health_check { 37 | port = 8000 38 | path = "/docs" 39 | interval = 30 40 | protocol = "HTTP" 41 | timeout = 5 42 | unhealthy_threshold = 2 43 | matcher = 200 44 | } 45 | } 46 | resource "aws_lb_listener" "http" { 47 | port = "8000" 48 | protocol = "HTTP" 49 | load_balancer_arn = aws_lb.this.arn 50 | default_action { 51 | target_group_arn = aws_lb_target_group.this.arn 52 | type = "forward" 53 | } 54 | depends_on = [aws_lb_target_group.this] 55 | } 56 | resource "aws_lb_listener_rule" "this" { 57 | listener_arn = aws_lb_listener.http.arn 58 | action { 59 | type = "forward" 60 | target_group_arn = aws_lb_target_group.this.arn 61 | } 62 | condition { 63 | path_pattern { 64 | values = ["*"] 65 | } 66 | } 67 | } 68 | 69 | # IAM 70 | data "aws_iam_policy_document" "ecs_assume_policy" { 71 | statement { 72 | actions = ["sts:AssumeRole"] 73 | principals { 74 | type = "Service" 75 | identifiers = ["ecs-tasks.amazonaws.com"] 76 | } 77 | } 78 | } 79 | resource "aws_iam_role" "ecs_execution_role" { 80 | name = "${var.app_name}-execution-role" 81 | assume_role_policy = data.aws_iam_policy_document.ecs_assume_policy.json 82 | } 83 | resource "aws_iam_policy" "ecs_execution_policy" { 84 | name = "${var.app_name}-ecs-execution-role-policy" 85 | policy = jsonencode({ 86 | Version = "2012-10-17" 87 | Statement = [ 88 | { 89 | Effect : "Allow", 90 | Action : [ 91 | "ecr:*", 92 | "ecs:*", 93 | "elasticloadbalancing:*", 94 | "cloudwatch:*", 95 | "logs:*" 96 | ], 97 | Resource : "*" 98 | } 99 | ] 100 | }) 101 | } 102 | resource "aws_iam_role_policy_attachment" "ecs_execution_role_policy_attach" { 103 | role = aws_iam_role.ecs_execution_role.name 104 | policy_arn = aws_iam_policy.ecs_execution_policy.arn 105 | } 106 | 107 | # ECS 108 | resource "aws_cloudwatch_log_group" "ecs" { 109 | name = "/aws/ecs/${var.app_name}/cluster" 110 | } 111 | resource "aws_ecs_task_definition" "api" { 112 | family = "${var.app_name}-api-task" 113 | requires_compatibilities = ["FARGATE"] 114 | network_mode = "awsvpc" 115 | execution_role_arn = aws_iam_role.ecs_execution_role.arn 116 | task_role_arn = aws_iam_role.ecs_execution_role.arn 117 | cpu = 256 118 | memory = 512 119 | container_definitions = jsonencode([ 120 | { 121 | name = "${var.app_name}-api-container" 122 | image = "${var.image}" 123 | #command = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] 124 | portMappings = [ 125 | { 126 | hostPort = 8000 127 | containerPort = 8000 128 | protocol = "tcp" 129 | } 130 | ], 131 | logConfiguration = { 132 | logDriver = "awslogs" 133 | options = { 134 | awslogs-group = aws_cloudwatch_log_group.ecs.name 135 | awslogs-stream-prefix = "ecs" 136 | awslogs-region = var.region 137 | } 138 | } 139 | } 140 | ]) 141 | } 142 | 143 | # Cluster 144 | resource "aws_ecs_cluster" "this" { 145 | name = "${var.app_name}-cluster" 146 | setting { 147 | name = "containerInsights" 148 | value = "enabled" 149 | } 150 | } 151 | 152 | # Security Group and Service 153 | resource "aws_security_group" "ecs" { 154 | name = "${var.app_name}-ecs-sg" 155 | vpc_id = var.vpc_id 156 | egress { 157 | from_port = 0 158 | to_port = 0 159 | protocol = "-1" 160 | cidr_blocks = ["0.0.0.0/0"] 161 | } 162 | ingress { 163 | from_port = 8000 164 | to_port = 8000 165 | protocol = "tcp" 166 | security_groups = [aws_security_group.alb.id] 167 | } 168 | } 169 | resource "aws_ecs_service" "api" { 170 | name = "${var.app_name}-ecs-service" 171 | cluster = aws_ecs_cluster.this.name 172 | launch_type = "FARGATE" 173 | desired_count = length(var.private_subnet_ids) 174 | task_definition = aws_ecs_task_definition.api.arn 175 | network_configuration { 176 | subnets = var.private_subnet_ids 177 | security_groups = [aws_security_group.ecs.id] 178 | } 179 | load_balancer { 180 | target_group_arn = aws_lb_target_group.this.arn 181 | container_name = "${var.app_name}-api-container" 182 | container_port = "8000" 183 | } 184 | lifecycle { 185 | ignore_changes = [ 186 | desired_count, 187 | ] 188 | } 189 | depends_on = [aws_lb_listener_rule.this] 190 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/ecs/output.tf: -------------------------------------------------------------------------------- 1 | output "alb_dns_name" { 2 | value = aws_lb.this.dns_name 3 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/ecs/variable.tf: -------------------------------------------------------------------------------- 1 | variable "app_name" { 2 | description = "Name of the app." 3 | type = string 4 | } 5 | variable "region" { 6 | description = "AWS region to deploy the network to." 7 | type = string 8 | } 9 | variable "image" { 10 | description = "Image used to start the container. Should be in repository-url/image:tag format." 11 | type = string 12 | } 13 | variable "vpc_id" { 14 | description = "ID of the VPC where the ECS will be hosted." 15 | type = string 16 | } 17 | variable "public_subnet_ids" { 18 | description = "IDs of public subnets where the ALB will be attached to." 19 | type = list(string) 20 | } 21 | variable "private_subnet_ids" { 22 | description = "IDs of private subnets where the ECS service will be deployed to." 23 | type = list(string) 24 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.region 3 | default_tags { 4 | tags = { 5 | app = var.app_name 6 | } 7 | } 8 | } 9 | 10 | module "network" { 11 | source = "./network" 12 | app_name = var.app_name 13 | region = var.region 14 | } 15 | 16 | module "ecs" { 17 | source = "./ecs" 18 | app_name = var.app_name 19 | region = var.region 20 | image = var.image 21 | vpc_id = module.network.vpc.id 22 | public_subnet_ids = [for s in module.network.public_subnets : s.id] 23 | private_subnet_ids = [for s in module.network.private_subnets : s.id] 24 | depends_on = [module.network] 25 | } 26 | 27 | 28 | # Outputs 29 | output "alb_dns_name" { 30 | value = module.ecs.alb_dns_name 31 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/network/main.tf: -------------------------------------------------------------------------------- 1 | # Define provider 2 | provider "aws" { 3 | region = var.region 4 | default_tags { 5 | tags = { 6 | app = var.app_name 7 | } 8 | } 9 | } 10 | 11 | # Create VPC and IGW 12 | resource "aws_vpc" "this" { 13 | cidr_block = var.vpc_cidr_block 14 | } 15 | resource "aws_internet_gateway" "this" { 16 | vpc_id = aws_vpc.this.id 17 | } 18 | 19 | # Create public subnets 20 | resource "aws_subnet" "public_subnets" { 21 | count = length(var.availability_zones) 22 | vpc_id = aws_vpc.this.id 23 | cidr_block = var.public_cidr_blocks[count.index] 24 | availability_zone = var.availability_zones[count.index] 25 | } 26 | 27 | # Create routing tables for public subnets 28 | resource "aws_route_table" "public" { 29 | vpc_id = aws_vpc.this.id 30 | route { 31 | cidr_block = "0.0.0.0/0" 32 | gateway_id = aws_internet_gateway.this.id 33 | } 34 | } 35 | resource "aws_route_table_association" "publics" { 36 | count = length(var.availability_zones) 37 | subnet_id = element(aws_subnet.public_subnets.*.id, count.index) 38 | route_table_id = aws_route_table.public.id 39 | } 40 | 41 | 42 | # Create Elastic IPs and NAT Gateways 43 | resource "aws_eip" "eips" { 44 | count = length(var.availability_zones) 45 | domain = "vpc" 46 | } 47 | resource "aws_nat_gateway" "this" { 48 | count = length(var.availability_zones) 49 | subnet_id = element(aws_subnet.public_subnets.*.id, count.index) 50 | allocation_id = element(aws_eip.eips.*.id, count.index) 51 | } 52 | 53 | # Create private subnets 54 | resource "aws_subnet" "private_subnets" { 55 | count = length(var.availability_zones) 56 | vpc_id = aws_vpc.this.id 57 | cidr_block = var.private_cidr_blocks[count.index] 58 | availability_zone = var.availability_zones[count.index] 59 | } 60 | 61 | # Create routing tables for private subnets 62 | resource "aws_route_table" "private" { 63 | count = length(var.availability_zones) 64 | vpc_id = aws_vpc.this.id 65 | route { 66 | cidr_block = "0.0.0.0/0" 67 | nat_gateway_id = element(aws_nat_gateway.this.*.id, count.index) 68 | } 69 | } 70 | resource "aws_route_table_association" "privates" { 71 | count = length(var.availability_zones) 72 | subnet_id = element(aws_subnet.private_subnets.*.id, count.index) 73 | route_table_id = element(aws_route_table.private.*.id, count.index) 74 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/network/outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc" { 2 | value = aws_vpc.this 3 | } 4 | output "public_subnets" { 5 | value = aws_subnet.public_subnets 6 | } 7 | output "private_subnets" { 8 | value = aws_subnet.private_subnets 9 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/network/variable.tf: -------------------------------------------------------------------------------- 1 | variable "app_name" { 2 | type = string 3 | } 4 | variable "region" { 5 | type = string 6 | } 7 | variable "vpc_cidr_block" { 8 | type = string 9 | default = "10.0.0.0/16" 10 | } 11 | variable "availability_zones" { 12 | type = list(string) 13 | default = ["us-east-1a", "us-east-1f"] 14 | } 15 | variable "public_cidr_blocks" { 16 | type = list(string) 17 | default = ["10.0.1.0/24", "10.0.2.0/24"] 18 | } 19 | variable "private_cidr_blocks" { 20 | type = list(string) 21 | default = ["10.0.11.0/24", "10.0.12.0/24"] 22 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/app/variable.tf: -------------------------------------------------------------------------------- 1 | variable "app_name" { 2 | description = "Name of the app." 3 | type = string 4 | } 5 | variable "region" { 6 | description = "AWS region to deploy the network to." 7 | type = string 8 | } 9 | variable "image" { 10 | description = "Image used to start the container. Should be in repository-url/image:tag format." 11 | type = string 12 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/setup/main.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecr_repository" "this" { 2 | name = "${var.app_name}" 3 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/setup/output.tf: -------------------------------------------------------------------------------- 1 | output "ecr_repo_url" { 2 | value = aws_ecr_repository.this.repository_url 3 | } -------------------------------------------------------------------------------- /serving_endpoint/ecs/infra/setup/variable.tf: -------------------------------------------------------------------------------- 1 | variable "app_name" { 2 | description = "Name of the app." 3 | type = string 4 | } -------------------------------------------------------------------------------- /serving_endpoint/example.env: -------------------------------------------------------------------------------- 1 | AWS_ACCESS_KEY_ID= 2 | AWS_SECRET_ACCESS_KEY= 3 | AWS_DEFAULT_REGION= 4 | GUARDRAILS_TOKEN= 5 | -------------------------------------------------------------------------------- /serving_endpoint/imgs/Model-deployment.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/Model-deployment.webp -------------------------------------------------------------------------------- /serving_endpoint/imgs/litserve-deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/litserve-deployment.png -------------------------------------------------------------------------------- /serving_endpoint/imgs/litserve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/litserve.png -------------------------------------------------------------------------------- /serving_endpoint/imgs/model-registry.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/model-registry.webp -------------------------------------------------------------------------------- /serving_endpoint/requirements.txt: -------------------------------------------------------------------------------- 1 | mlflow-skinny==2.17.2 2 | sagemaker-mlflow==0.1.0 3 | cloudpickle==3.1.0 4 | colorama==0.4.6 5 | langchain-aws==0.2.7 6 | langchain-community==0.3.4 7 | langchain-ollama==0.2.0 8 | langchain-openai==0.2.9 9 | langchain==0.3.7 10 | langgraph==0.2.27 11 | numpy==1.26.4 12 | psutil==6.1.0 13 | pydantic==2.9.2 14 | rich==13.9.4 15 | tornado==6.4.1 16 | litserve==0.2.4 17 | langchain-qdrant==0.2.0 18 | 19 | guardrails-ai==0.6.0 ; python_version >= "3.10" and python_version < "3.13" \ 20 | --hash=sha256:6bd634b56ef34c6027ea066ea411f895261f14204e0592bdefb446875ce68eea \ 21 | --hash=sha256:a11a0aad96ecbb412bce58533fcaaa03ca6d21872f5bad02babffe4959a13e17 22 | guardrails-api-client==0.4.0a1 ; python_version >= "3.10" and python_version < "3.13" \ 23 | --hash=sha256:102e70cd53704298cd3d71c58bdac71bc6bfa2c341bc3b336a4ec434c540e9b4 \ 24 | --hash=sha256:163352bc09b295966d206bc5e912edb29fc3cae8f7749a6ceea1a80aae816029 25 | guardrails-hub-types==0.0.4 ; python_version >= "3.10" and python_version < "3.13" \ 26 | --hash=sha256:a5ad863fd6b354e41c3d83d8c1e835f7243a0ae0ec944b6601d1e3ee5d806a51 \ 27 | --hash=sha256:ad86faecc142e853d6c6fa24ecb50723c86f834fbe1f2e6b932f2921a08b60b3 28 | pandas -------------------------------------------------------------------------------- /serving_endpoint/server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | from typing import Any, Dict, List 5 | 6 | import litserve as ls 7 | import mlflow 8 | import mlflow.langchain 9 | from pydantic import BaseModel, constr 10 | 11 | # ==== DEFINING INPUT / OUTPUT SCHEMAS WITH EXAMPLES ==== 12 | 13 | 14 | class MessageModel(BaseModel): 15 | role: str = "user" 16 | content: str = "What is the company's sick leave policy?" 17 | 18 | 19 | # Input Format 20 | class TextRequestModel(BaseModel): 21 | messages: List[MessageModel] = [ 22 | MessageModel(role="user", content="What is the company's sick leave policy?"), 23 | MessageModel( 24 | role="assistant", 25 | content="The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.", 26 | ), 27 | MessageModel(role="user", content="What is the meaning of life?"), 28 | ] 29 | vector_store_path: str = "http://host.docker.internal:6333" 30 | 31 | 32 | # Output Format 33 | class TextResponseModel(BaseModel): 34 | response: str = "The company's sick leave policy allows employees to take a certain number of sick days per year." 35 | source_documents: Any = None 36 | model_uri: str = "models:/rag-chatbot/latest" 37 | 38 | 39 | class LangchainRAGAPI(ls.LitAPI): 40 | def setup(self, device): 41 | """Initialize the model and any required resources""" 42 | # Configuration - these could be moved to environment variables 43 | self.model_uri = "models:/rag-chatbot-with-guardrails/latest" 44 | self.cache_dir = "/tmp/mlflow_cache" 45 | 46 | # server uri 47 | tracking_uri: str = "http://127.0.0.1:5001" 48 | registry_uri: str = "http://127.0.0.1:5001" 49 | 50 | mlflow.set_tracking_uri(tracking_uri) 51 | mlflow.set_registry_uri(registry_uri) 52 | 53 | # Initialize cache directory 54 | os.makedirs(self.cache_dir, exist_ok=True) 55 | 56 | # Load the chain 57 | self._initialize_chain() 58 | 59 | def _initialize_chain(self): 60 | """Load the chain with caching support""" 61 | try: 62 | cache_key = self.model_uri.replace("/", "_").replace(":", "_") 63 | cache_path = Path(self.cache_dir) / f"{cache_key}.json" 64 | 65 | if cache_path.exists(): 66 | logging.info(f"Loading chain from cache: {cache_path}") 67 | self.chain = mlflow.langchain.load_model(self.model_uri) 68 | else: 69 | logging.info(f"Loading chain from MLflow: {self.model_uri}") 70 | self.chain = mlflow.langchain.load_model(self.model_uri) 71 | 72 | except Exception as e: 73 | logging.error(f"Error loading chain: {str(e)}") 74 | raise 75 | 76 | def decode_request(self, request: TextRequestModel) -> Dict[str, Any]: 77 | """Decode and validate the incoming request""" 78 | return { 79 | "messages": [message.dict() for message in request.messages], 80 | "vector_store_path": request.vector_store_path, 81 | } 82 | 83 | def predict(self, request_data: Dict[str, Any]) -> Dict[str, Any]: 84 | """Make prediction using the loaded chain""" 85 | try: 86 | # Invoke the chain 87 | result = self.chain.invoke(request_data) 88 | 89 | # Handle different response formats 90 | if isinstance(result, dict): 91 | response = { 92 | "response": result.get("result", result.get("response", str(result))), 93 | "source_documents": result.get("sources"), 94 | } 95 | else: 96 | logging.warning(f"Mismatched response format: {type(result)} - {result}") 97 | response = {"response": str(result)} 98 | 99 | return response 100 | 101 | except Exception as e: 102 | logging.error(f"Prediction error: {str(e)}") 103 | raise 104 | 105 | def encode_response(self, prediction: Dict[str, Any]) -> TextResponseModel: 106 | """Encode the prediction result into the final response format""" 107 | return TextResponseModel( 108 | response=prediction.get("response"), 109 | source_documents=prediction.get("source_documents"), 110 | model_uri=self.model_uri, 111 | ) 112 | 113 | 114 | if __name__ == "__main__": 115 | # Configure logging 116 | logging.basicConfig(level=logging.INFO) 117 | 118 | # Initialize and start the server 119 | api = LangchainRAGAPI() 120 | server = ls.LitServer(api, accelerator="auto") 121 | server.run(port=8000) 122 | 123 | ############################################# Run in terminal ####################################### 124 | # litserve dockerize server.py --port 8000 --gpu 125 | -------------------------------------------------------------------------------- /src/llmops_project/__init__.py: -------------------------------------------------------------------------------- 1 | """LLMOps Package Production Ready template using open source technologies..""" 2 | -------------------------------------------------------------------------------- /src/llmops_project/__main__.py: -------------------------------------------------------------------------------- 1 | """Entry point of the package.""" 2 | 3 | # %% IMPORTS 4 | 5 | from llmops_project import scripts 6 | 7 | # %% MAIN 8 | 9 | if __name__ == "__main__": 10 | scripts.main() 11 | -------------------------------------------------------------------------------- /src/llmops_project/io/configs.py: -------------------------------------------------------------------------------- 1 | """Parse, merge, and convert config objects.""" 2 | 3 | # %% IMPORTS 4 | 5 | import typing as T 6 | 7 | import omegaconf as oc 8 | 9 | # %% TYPES 10 | 11 | Config = oc.ListConfig | oc.DictConfig 12 | 13 | # %% PARSERS 14 | 15 | 16 | def parse_file(path: str) -> Config: 17 | """Parse a config file from a path. 18 | 19 | Args: 20 | path (str): path to local config. 21 | 22 | Returns: 23 | Config: representation of the config file. 24 | """ 25 | return oc.OmegaConf.load(path) 26 | 27 | 28 | def parse_string(string: str) -> Config: 29 | """Parse the given config string. 30 | 31 | Args: 32 | string (str): content of config string. 33 | 34 | Returns: 35 | Config: representation of the config string. 36 | """ 37 | return oc.OmegaConf.create(string) 38 | 39 | 40 | # %% MERGERS 41 | 42 | 43 | def merge_configs(configs: T.Sequence[Config]) -> Config: 44 | """Merge a list of config into a single config. 45 | 46 | Args: 47 | configs (T.Sequence[Config]): list of configs. 48 | 49 | Returns: 50 | Config: representation of the merged config objects. 51 | """ 52 | return oc.OmegaConf.merge(*configs) 53 | 54 | 55 | # %% CONVERTERS 56 | 57 | 58 | def to_object(config: Config, resolve: bool = True) -> object: 59 | """Convert a config object to a python object. 60 | 61 | Args: 62 | config (Config): representation of the config. 63 | resolve (bool): resolve variables. Defaults to True. 64 | 65 | Returns: 66 | object: conversion of the config to a python object. 67 | """ 68 | return oc.OmegaConf.to_container(config, resolve=resolve) 69 | -------------------------------------------------------------------------------- /src/llmops_project/io/services.py: -------------------------------------------------------------------------------- 1 | """Manage global context during execution.""" 2 | 3 | # %% IMPORTS 4 | 5 | from __future__ import annotations 6 | 7 | import abc 8 | import contextlib as ctx 9 | import os 10 | import sys 11 | import typing as T 12 | 13 | import dotenv 14 | import loguru 15 | import mlflow 16 | import mlflow.tracking as mt 17 | import pydantic as pdt 18 | 19 | # %% SERVICES 20 | 21 | 22 | class Service(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 23 | """Base class for a global service. 24 | 25 | Use services to manage global contexts. 26 | e.g., logger object, mlflow client, spark context, ... 27 | """ 28 | 29 | @abc.abstractmethod 30 | def start(self) -> None: 31 | """Start the service.""" 32 | 33 | def stop(self) -> None: 34 | """Stop the service.""" 35 | # does nothing by default 36 | 37 | 38 | class LoggerService(Service, frozen=True): 39 | """Service for logging messages. 40 | 41 | https://loguru.readthedocs.io/en/stable/api/logger.html 42 | 43 | Parameters: 44 | sink (str): logging output. 45 | level (str): logging level. 46 | format (str): logging format. 47 | colorize (bool): colorize output. 48 | serialize (bool): convert to JSON. 49 | backtrace (bool): enable exception trace. 50 | diagnose (bool): enable variable display. 51 | catch (bool): catch errors during log handling. 52 | """ 53 | 54 | sink: str = "stderr" 55 | level: str = "DEBUG" 56 | format: str = ( 57 | "[{time:YYYY-MM-DD HH:mm:ss.SSS}]" 58 | "[{level}]" 59 | "[{name}:{function}:{line}]" 60 | " {message}" 61 | ) 62 | colorize: bool = True 63 | serialize: bool = False 64 | backtrace: bool = True 65 | diagnose: bool = False 66 | catch: bool = True 67 | 68 | @T.override 69 | def start(self) -> None: 70 | loguru.logger.remove() 71 | config = self.model_dump() 72 | # use standard sinks or keep the original 73 | sinks = {"stderr": sys.stderr, "stdout": sys.stdout} 74 | config["sink"] = sinks.get(config["sink"], config["sink"]) 75 | loguru.logger.add(**config) 76 | 77 | def logger(self) -> loguru.Logger: 78 | """Return the main logger. 79 | 80 | Returns: 81 | loguru.Logger: the main logger. 82 | """ 83 | return loguru.logger 84 | 85 | 86 | class MlflowService(Service): # type: ignore[misc] 87 | """Service for Mlflow tracking and registry. 88 | 89 | Parameters: 90 | tracking_uri (str): the URI for the Mlflow tracking server. 91 | registry_uri (str): the URI for the Mlflow model registry. 92 | experiment_name (str): the name of tracking experiment. 93 | registry_name (str): the name of model registry. 94 | autolog_disable (bool): disable autologging. 95 | autolog_disable_for_unsupported_versions (bool): disable autologging for unsupported versions. 96 | autolog_exclusive (bool): If True, enables exclusive autologging. 97 | autolog_log_input_examples (bool): If True, logs input examples during autologging. 98 | autolog_log_model_signatures (bool): If True, logs model signatures during autologging. 99 | autolog_log_models (bool): If True, enables logging of models during autologging. 100 | autolog_log_datasets (bool): If True, logs datasets used during autologging. 101 | autolog_silent (bool): If True, suppresses all Mlflow warnings during autologging. 102 | """ 103 | 104 | class RunConfig(pdt.BaseModel, strict=True, frozen=True, extra="forbid"): 105 | """Run configuration for Mlflow tracking. 106 | 107 | Parameters: 108 | name (str): name of the run. 109 | description (str | None): description of the run. 110 | tags (dict[str, T.Any] | None): tags for the run. 111 | log_system_metrics (bool | None): enable system metrics logging. 112 | """ 113 | 114 | name: str 115 | description: str | None = None 116 | tags: dict[str, T.Any] | None = None 117 | log_system_metrics: bool | None = True 118 | 119 | dotenv.load_dotenv() 120 | 121 | # Check if the environment variables are set 122 | if "AWS_ACCESS_KEY_ID" not in os.environ: 123 | raise 124 | if "AWS_REGION" not in os.environ: 125 | raise 126 | if "AWS_SECRET_ACCESS_KEY" not in os.environ: 127 | raise 128 | 129 | # server uri 130 | tracking_uri: str = "http://127.0.0.1:5001" 131 | registry_uri: str = "http://127.0.0.1:5001" 132 | # experiment 133 | experiment_name: str = "rag_chatbot_experiment" 134 | # registry 135 | registry_name: str = "my_model_registry" 136 | # autolog 137 | autolog_disable: bool = False 138 | autolog_disable_for_unsupported_versions: bool = False 139 | autolog_exclusive: bool = False 140 | autolog_log_input_examples: bool = True 141 | autolog_log_model_signatures: bool = True 142 | autolog_log_models: bool = False 143 | autolog_log_datasets: bool = False 144 | autolog_silent: bool = False 145 | 146 | @T.override 147 | def start(self) -> None: 148 | # server uri 149 | mlflow.set_tracking_uri(uri=self.tracking_uri) 150 | mlflow.set_registry_uri(uri=self.registry_uri) 151 | # experiment 152 | mlflow.set_experiment(experiment_name=self.experiment_name) 153 | # autolog 154 | mlflow.autolog( 155 | disable=self.autolog_disable, 156 | disable_for_unsupported_versions=self.autolog_disable_for_unsupported_versions, 157 | exclusive=self.autolog_exclusive, 158 | log_input_examples=self.autolog_log_input_examples, 159 | log_model_signatures=self.autolog_log_model_signatures, 160 | log_datasets=self.autolog_log_datasets, 161 | silent=self.autolog_silent, 162 | ) 163 | 164 | @ctx.contextmanager 165 | def run_context(self, run_config: RunConfig) -> T.Generator[mlflow.ActiveRun, None, None]: 166 | """Yield an active Mlflow run and exit it afterwards. 167 | 168 | Args: 169 | run (str): run parameters. 170 | 171 | Yields: 172 | T.Generator[mlflow.ActiveRun, None, None]: active run context. Will be closed as the end of context. 173 | """ 174 | with mlflow.start_run( 175 | run_name=run_config.name, 176 | tags=run_config.tags, 177 | description=run_config.description, 178 | log_system_metrics=run_config.log_system_metrics, 179 | ) as run: 180 | yield run 181 | 182 | def client(self) -> mt.MlflowClient: 183 | """Return a new Mlflow client. 184 | 185 | Returns: 186 | MlflowClient: the mlflow client. 187 | """ 188 | return mt.MlflowClient(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri) 189 | -------------------------------------------------------------------------------- /src/llmops_project/io/vector_db.py: -------------------------------------------------------------------------------- 1 | # Create Abstract class to Create Delete and Ingest documents to Vector DB 2 | import os 3 | from abc import ABC, abstractmethod 4 | from typing import List, Optional 5 | 6 | # Import utility functions 7 | from langchain_aws import BedrockEmbeddings 8 | from langchain_community.docstore.in_memory import InMemoryDocstore 9 | from langchain_community.document_loaders import DirectoryLoader 10 | from langchain_community.vectorstores import FAISS 11 | from langchain_core.documents import Document 12 | from langchain_qdrant import QdrantVectorStore 13 | from langchain_text_splitters import RecursiveCharacterTextSplitter 14 | from PyPDF2 import PdfReader 15 | from qdrant_client import QdrantClient 16 | from qdrant_client.models import Distance, VectorParams 17 | 18 | 19 | class VectorDB(ABC): 20 | """Vector Database Abstract Class""" 21 | 22 | def __init__(self, embedding_model: str, embedding_model_size: int, vector_store_path: str): 23 | self.embedding_model = embedding_model 24 | self.vector_store_path = vector_store_path 25 | self.embedding_model_size = embedding_model_size 26 | 27 | @abstractmethod 28 | def create_vector_db(self): 29 | pass 30 | 31 | @abstractmethod 32 | def delete_vector_db(self): 33 | pass 34 | 35 | @abstractmethod 36 | def ingest_documents(self, document_path: str): 37 | pass 38 | 39 | 40 | # %% FAISS Vector Database Class 41 | 42 | 43 | # Implement Faiss Vector Database Class 44 | class FAISSVectorDB(VectorDB): 45 | """FAISS Vector Database Class""" 46 | 47 | def __init__( 48 | self, 49 | embedding_model: str = "amazon.titan-embed-text-v1", 50 | embedding_model_size: int = 1536, 51 | vector_store_path: str = "faiss_db/", 52 | ): 53 | super().__init__(embedding_model, embedding_model_size, vector_store_path) 54 | 55 | def create_vector_db(self): 56 | """Create an empty FAISS vector store. 57 | 58 | Args: 59 | config_path (str): Path to the chain's configuration file. 60 | vector_store_path (str): Path to save the empty vector store. 61 | """ 62 | 63 | from faiss import IndexFlatL2 64 | 65 | # Load Ollama embeddings with specified model from config 66 | embeddings = BedrockEmbeddings(model_id=self.embedding_model) 67 | embedding_dimension = self.embedding_model_size 68 | 69 | # Create an empty FAISS vector store initialized with the embeddings dimension 70 | index = IndexFlatL2(embedding_dimension) # Using L2 distance for the index 71 | 72 | # Create a local file store for persistent document storage 73 | docstore = InMemoryDocstore() 74 | 75 | # Create the FAISS vector store with the empty index and document store 76 | vector_store = FAISS( 77 | embedding_function=embeddings, index=index, docstore=docstore, index_to_docstore_id={} 78 | ) 79 | 80 | # Save the empty vector store locally 81 | vector_store.save_local(folder_path=self.vector_store_path) 82 | return vector_store 83 | 84 | def delete_vector_db(self): 85 | # Delete the FAISS vector store 86 | try: 87 | os.remove(self.vector_store_path) 88 | except FileNotFoundError: 89 | pass 90 | 91 | def _load_pdfs_from_directory(self, directory_path: str): 92 | documents = [] 93 | for filename in os.listdir(directory_path): 94 | if filename.endswith(".pdf"): 95 | file_path = os.path.join(directory_path, filename) 96 | with open(file_path, "rb") as file: 97 | pdf_reader = PdfReader(file) 98 | for page_num in range(len(pdf_reader.pages)): 99 | page = pdf_reader.pages[page_num] 100 | text = page.extract_text() 101 | if text: 102 | documents.append( 103 | Document( 104 | page_content=text, 105 | metadata={"source": filename, "page": page_num + 1}, 106 | ) 107 | ) 108 | return documents 109 | 110 | # %% Main pipeline function for ingesting and updating the vector database 111 | def ingest_documents(self, document_path) -> None: 112 | # Load documents from the specified PDF directory 113 | documents = self._load_pdfs_from_directory(document_path) 114 | 115 | # Load Ollama embeddings 116 | embeddings = BedrockEmbeddings(model_id=self.embedding_model) 117 | 118 | if documents: 119 | # Load the existing FAISS vector store 120 | vector_store = FAISS.load_local( 121 | folder_path=self.vector_store_path, 122 | embeddings=embeddings, 123 | allow_dangerous_deserialization=True, 124 | ) 125 | 126 | # Add documents to the vector store 127 | vector_store.add_documents(documents) 128 | 129 | # Save the updated vector store locally 130 | vector_store.save_local(folder_path=self.vector_store_path) 131 | 132 | 133 | class QdrantVectorDB: 134 | def __init__( 135 | self, 136 | collection_name: str, 137 | embeddings_model: BedrockEmbeddings, 138 | url: str = "http://localhost:6333", 139 | api_key: Optional[str] = None, 140 | vector_size: int = 1536, 141 | distance: Distance = Distance.COSINE, 142 | ): 143 | """ 144 | Initialize Qdrant vector database and embeddings 145 | 146 | :param collection_name: Name of the Qdrant collection 147 | :param embeddings_model: Langchain embeddings model 148 | """ 149 | # Initialize Qdrant client (in-memory for this example) 150 | self.client = QdrantClient(url=url, api_key=api_key) 151 | self.collection_name = collection_name 152 | self.embeddings = embeddings_model 153 | self.vector_size = vector_size 154 | self.distance = distance 155 | 156 | def create_vector_db(self): 157 | """ 158 | Create a new collection in the Qdrant database 159 | 160 | :param collection_name: Name of the collection 161 | :param vector_size: Size of the vector 162 | """ 163 | self.client.create_collection( 164 | collection_name=self.collection_name, 165 | vectors_config=VectorParams(size=self.vector_size, distance=self.distance), 166 | ) 167 | 168 | def ingest_documents(self, folder_path: str, chunk_size=500, chunk_overlap=50) -> List[dict]: 169 | """ 170 | Load documents from a specified folder 171 | 172 | :param folder_path: Path to the folder containing documents 173 | :return: List of processed documents 174 | """ 175 | # Load documents from directory 176 | loader = DirectoryLoader(folder_path) 177 | documents = loader.load() 178 | 179 | # Split documents into chunks 180 | text_splitter = RecursiveCharacterTextSplitter( 181 | chunk_size=chunk_size, 182 | chunk_overlap=chunk_overlap, 183 | ) 184 | split_docs = text_splitter.split_documents(documents) 185 | 186 | # Generate embeddings and prepare for Qdrant 187 | points = [] 188 | for idx, doc in enumerate(split_docs): 189 | # Generate embedding 190 | embedding = self.embeddings.embed_query(doc.page_content) 191 | 192 | points.append( 193 | { 194 | "id": idx, 195 | "vector": embedding, 196 | "payload": {"page_content": doc.page_content, "metadata": doc.metadata}, 197 | } 198 | ) 199 | 200 | # Upsert points into Qdrant 201 | self.client.upsert(collection_name=self.collection_name, points=points) 202 | 203 | return points 204 | 205 | def load_documents_via_langchain(self, folder_path) -> QdrantVectorStore: 206 | """ 207 | Load documents from a specified folder 208 | 209 | :param folder_path: Path to the folder containing documents 210 | :return: List of processed documents 211 | """ 212 | 213 | # Load documents from directory 214 | loader = DirectoryLoader(folder_path) 215 | documents = loader.load() 216 | 217 | doc_store = QdrantVectorStore.from_documents( 218 | documents, 219 | self.embeddings, 220 | url="http://localhost:6333", 221 | collection_name=self.collection_name, 222 | ) 223 | return doc_store 224 | 225 | def query_database(self, query: str, top_k: int = 5) -> List[dict]: 226 | """ 227 | Query the vector database 228 | 229 | :param query: Search query string 230 | :param top_k: Number of top results to return 231 | :return: List of top matching documents 232 | """ 233 | # Generate embedding for the query 234 | query_embedding = self.embeddings.embed_query(query) 235 | 236 | # Perform search 237 | search_result = self.client.search( 238 | collection_name=self.collection_name, query_vector=query_embedding, limit=top_k 239 | ) 240 | 241 | if not search_result: 242 | return [{"score": 0.0, "text": "", "source": ""}] 243 | 244 | return [ 245 | { 246 | "score": result.score, 247 | "text": result.payload.get("text", "") if result.payload else "", 248 | "source": result.payload.get("source", "") if result.payload else "", 249 | } 250 | for result in search_result 251 | ] 252 | -------------------------------------------------------------------------------- /src/llmops_project/models/rag_chatbot.py: -------------------------------------------------------------------------------- 1 | # mypy: ignore-errors 2 | import os 3 | from operator import itemgetter 4 | from pathlib import Path 5 | from typing import Any, Dict, List 6 | 7 | import mlflow 8 | from langchain.prompts import PromptTemplate 9 | from langchain.schema.output_parser import StrOutputParser 10 | from langchain.schema.runnable import RunnableBranch, RunnableLambda, RunnablePassthrough 11 | from langchain_aws import BedrockEmbeddings, ChatBedrock 12 | from langchain_community.vectorstores import FAISS 13 | from langchain_core.documents import Document 14 | 15 | # Get the current working directory 16 | script_dir = Path(os.getcwd()) 17 | 18 | # Navigate up to the parent folder (you can use .parent to go up one level) 19 | parent_dir = script_dir.parent 20 | grandparent_dir = parent_dir.parent # Go up one more level 21 | 22 | # Combine the path to reach the config directory 23 | config_path = "rag_chain_config.yaml" 24 | 25 | ## Enable MLflow Tracing 26 | mlflow.langchain.autolog() 27 | 28 | print("CONFIG PATH", config_path) 29 | model_config = mlflow.models.ModelConfig(development_config=config_path) 30 | 31 | guardrail_config = model_config.get("guardrail_config") 32 | llm_config = model_config.get("llm_config") 33 | retriever_config = model_config.get("retriever_config") 34 | 35 | 36 | # The question is the last entry of the history 37 | def extract_question(input: List[Dict[str, Any]]) -> str: 38 | """ 39 | Extract the question from the input. 40 | 41 | Args: 42 | input (list[dict]): The input containing chat messages. 43 | 44 | Returns: 45 | str: The extracted question. 46 | """ 47 | return input[-1]["content"] 48 | 49 | 50 | # The history is everything before the last question 51 | def extract_history(input: List[Dict[str, str]]) -> List[Dict[str, str]]: 52 | """ 53 | Extract the chat history from the input. 54 | 55 | Args: 56 | input (list[dict]): The input containing chat messages. 57 | 58 | Returns: 59 | list[dict]: The extracted chat history. 60 | """ 61 | return input[:-1] 62 | 63 | 64 | # TODO: Convert to Few Shot Prompt 65 | guardrail_prompt = PromptTemplate( 66 | input_variables=["chat_history", "question"], 67 | template=guardrail_config["prompt"], 68 | ) 69 | 70 | guardrail_model = ChatBedrock( 71 | model_id=guardrail_config["model"], 72 | model_kwargs=dict(temperature=0.01), 73 | ) 74 | 75 | chat_model = ChatBedrock( 76 | model_id=llm_config["llm_model"], 77 | model_kwargs=dict(temperature=0.01), 78 | ) 79 | 80 | 81 | guardrail_chain = ( 82 | { 83 | "question": itemgetter("messages") | RunnableLambda(extract_question), 84 | "chat_history": itemgetter("messages") | RunnableLambda(extract_history), 85 | } 86 | | guardrail_prompt 87 | | guardrail_model 88 | | StrOutputParser() 89 | ) 90 | 91 | 92 | def get_retriever(path: str): 93 | """ 94 | Get the FAISS retriever. 95 | 96 | Args: 97 | path (str, optional): The path to the vector store. Defaults to None. 98 | 99 | Returns: 100 | FAISS: The FAISS retriever. 101 | """ 102 | # Ensure the config path is relative to this script's location 103 | # Load Vector Store 104 | # Get the FAISS retriever 105 | embeddings = BedrockEmbeddings() 106 | vector_store = FAISS.load_local( 107 | embeddings=embeddings, 108 | folder_path=path, 109 | allow_dangerous_deserialization=True, 110 | ) 111 | 112 | # configure document retrieval 113 | retriever = vector_store.as_retriever( 114 | search_kwargs={"k": retriever_config.get("parameters")["k"]} 115 | ) 116 | return retriever 117 | 118 | 119 | # Setup Prompt to re-write query from chat history context 120 | generate_query_to_retrieve_context_prompt = PromptTemplate( 121 | input_variables=["chat_history", "question"], 122 | template=llm_config["query_rewriter_prompt_template"], 123 | ) 124 | 125 | 126 | # Setup query rewriter chain 127 | generate_query_to_retrieve_context_chain = { 128 | "question": itemgetter("messages") | RunnableLambda(extract_question), 129 | "chat_history": itemgetter("messages") | RunnableLambda(extract_history), 130 | } | RunnableBranch( # Augment query only when there is a chat history 131 | ( 132 | lambda x: x["chat_history"], 133 | generate_query_to_retrieve_context_prompt | chat_model | StrOutputParser(), 134 | ), 135 | (lambda x: not x["chat_history"], RunnableLambda(lambda x: x["question"])), 136 | RunnableLambda(lambda x: x["question"]), 137 | ) # type: ignore 138 | 139 | 140 | question_with_history_and_context_prompt = PromptTemplate( 141 | input_variables=["chat_history", "context", "question"], 142 | template=llm_config.get("llm_prompt_template"), # Add Question with History and Context Prompt 143 | ) 144 | 145 | 146 | def format_context(docs: List[Document]) -> str: 147 | """ 148 | Format the context from a list of documents. 149 | 150 | Args: 151 | docs (list[Document]): A list of documents. 152 | 153 | Returns: 154 | str: A formatted string containing the content of the documents. 155 | """ 156 | return "\n\n".join([d.page_content for d in docs]) 157 | 158 | 159 | def extract_source_urls(docs: List[Document]) -> List[str]: 160 | """ 161 | Extract source URLs from a list of documents. 162 | 163 | Args: 164 | docs (list[Document]): A list of documents. 165 | 166 | Returns: 167 | list[str]: A list of source URLs extracted from the documents' metadata. 168 | """ 169 | return [d.metadata[retriever_config.get("schema")["document_uri"]] for d in docs] 170 | 171 | 172 | relevant_question_chain = ( 173 | RunnablePassthrough() # type: ignore 174 | | { 175 | "relevant_docs": generate_query_to_retrieve_context_prompt | chat_model | StrOutputParser(), 176 | "chat_history": itemgetter("chat_history"), 177 | "question": itemgetter("question"), 178 | "vector_store_path": itemgetter("vector_store_path"), 179 | } 180 | | { 181 | "relevant_docs": itemgetter("relevant_docs"), 182 | "chat_history": itemgetter("chat_history"), 183 | "question": itemgetter("question"), 184 | "vector_store_path": itemgetter("vector_store_path"), 185 | } 186 | | RunnableLambda( 187 | lambda x: { 188 | "relevant_docs": get_retriever(x["vector_store_path"]).get_relevant_documents( 189 | x["relevant_docs"] 190 | ), 191 | "chat_history": x["chat_history"], 192 | "question": x["question"], 193 | "vector_store_path": x["vector_store_path"], 194 | } 195 | ) 196 | | { 197 | "context": itemgetter("relevant_docs") | RunnableLambda(format_context), 198 | "sources": itemgetter("relevant_docs") | RunnableLambda(extract_source_urls), 199 | "chat_history": itemgetter("chat_history"), 200 | "question": itemgetter("question"), 201 | } 202 | | {"prompt": question_with_history_and_context_prompt, "sources": itemgetter("sources")} 203 | | { 204 | "result": itemgetter("prompt") | chat_model | StrOutputParser(), 205 | "sources": itemgetter("sources"), 206 | } 207 | ) 208 | 209 | 210 | irrelevant_question_chain = RunnableLambda( 211 | lambda x: {"result": llm_config.get("llm_refusal_fallback_answer"), "sources": []} 212 | ) 213 | 214 | branch_node = RunnableBranch( 215 | (lambda x: "yes" in x["question_is_relevant"].lower(), relevant_question_chain), 216 | (lambda x: "no" in x["question_is_relevant"].lower(), irrelevant_question_chain), 217 | irrelevant_question_chain, 218 | ) # type: ignore 219 | 220 | full_chain = { 221 | "question_is_relevant": guardrail_chain, 222 | "question": itemgetter("messages") | RunnableLambda(extract_question), 223 | "chat_history": itemgetter("messages") | RunnableLambda(extract_history), 224 | "vector_store_path": itemgetter("vector_store_path"), 225 | } | branch_node # type: ignore 226 | 227 | 228 | ## Tell MLflow logging where to find your chain. 229 | mlflow.models.set_model(model=full_chain) # type: ignore 230 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """High-level pipelines of the project.""" 2 | 3 | # %% IMPORTS 4 | 5 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob 6 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob 7 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob 8 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob 9 | from llmops_project.pipelines.managers.deployment_manager import DeploymentJob 10 | from llmops_project.pipelines.managers.feature_engineering_manager import FeatureEngineeringJob 11 | from llmops_project.pipelines.managers.monitoring_manager import MonitoringJob 12 | from llmops_project.pipelines.monitoring.generate_rag_dataset import GenerateRagDatasetJob 13 | from llmops_project.pipelines.monitoring.post_deploy_eval import MonitoringEvalJob 14 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob 15 | 16 | # %% TYPES 17 | 18 | JobKind = ( 19 | DeploymentJob 20 | | FeatureEngineeringJob 21 | | GenerateRagDatasetJob 22 | | EvaluateModelJob 23 | | CreateVectorDBJob 24 | | IngestAndUpdateVectorDBJob 25 | | DeployModelJob 26 | | LogAndRegisterModelJob 27 | | MonitoringEvalJob 28 | | MonitoringJob 29 | ) 30 | 31 | # %% EXPORTS 32 | 33 | __all__ = [ 34 | "DeploymentJob", 35 | "FeatureEngineeringJob", 36 | "GenerateRagDatasetJob", 37 | "EvaluateModelJob", 38 | "CreateVectorDBJob", 39 | "IngestAndUpdateVectorDBJob", 40 | "DeployModelJob", 41 | "LogAndRegisterModelJob", 42 | "MonitoringEvalJob", 43 | "MonitoringJob", 44 | "JobKind", 45 | ] 46 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/base.py: -------------------------------------------------------------------------------- 1 | """Base for high-level project jobs.""" 2 | 3 | # %% IMPORTS 4 | 5 | import abc 6 | import types as TS 7 | import typing as T 8 | 9 | import pydantic as pdt 10 | 11 | from llmops_project.io import services 12 | 13 | # %% TYPES 14 | 15 | # Local job variables 16 | Locals = T.Dict[str, T.Any] 17 | 18 | # %% JOBS 19 | 20 | 21 | class Job(abc.ABC, pdt.BaseModel, strict=True, frozen=True): 22 | """Base class for a job. 23 | 24 | use a job to execute runs in context. 25 | e.g., to define common services like logger 26 | 27 | Parameters: 28 | logger_service (services.LoggerService): manage the logger system. 29 | alerts_service (services.AlertsService): manage the alerts system. 30 | mlflow_service (services.MlflowService): manage the mlflow system. 31 | """ 32 | 33 | KIND: str 34 | 35 | logger_service: services.LoggerService = services.LoggerService() 36 | mlflow_service: services.MlflowService = services.MlflowService() 37 | 38 | def __enter__(self) -> T.Self: 39 | """Enter the job context. 40 | 41 | Returns: 42 | T.Self: return the current object. 43 | """ 44 | self.logger_service.start() 45 | logger = self.logger_service.logger() 46 | logger.debug("[START] Logger service: {}", self.logger_service) 47 | logger.debug("[START] Mlflow service: {}", self.mlflow_service) 48 | self.mlflow_service.start() 49 | return self 50 | 51 | def __exit__( 52 | self, 53 | exc_type: T.Type[BaseException] | None, 54 | exc_value: BaseException | None, 55 | exc_traceback: TS.TracebackType | None, 56 | ) -> T.Literal[False]: 57 | """Exit the job context. 58 | 59 | Args: 60 | exc_type (T.Type[BaseException] | None): ignored. 61 | exc_value (BaseException | None): ignored. 62 | exc_traceback (TS.TracebackType | None): ignored. 63 | 64 | Returns: 65 | T.Literal[False]: always propagate exceptions. 66 | """ 67 | logger = self.logger_service.logger() 68 | logger.debug("[STOP] Mlflow service: {}", self.mlflow_service) 69 | self.mlflow_service.stop() 70 | logger.debug("[STOP] Logger service: {}", self.logger_service) 71 | self.logger_service.stop() 72 | return False # re-raise 73 | 74 | @abc.abstractmethod 75 | def run(self) -> Locals: 76 | """Run the job in context. 77 | 78 | Returns: 79 | Locals: local job variables. 80 | """ 81 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/deployment/deploy_model.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | 3 | from llmops_project.pipelines import base 4 | 5 | 6 | # %% Job class for logging and registering the RAG model 7 | class DeployModelJob(base.Job): # type: ignore[misc] 8 | """Job to log and register the RAG model in MLflow. 9 | 10 | Parameters: 11 | run_config (services.MlflowService.RunConfig): mlflow run config. 12 | """ 13 | 14 | KIND: T.Literal["DeployModelJob"] = "DeployModelJob" 15 | 16 | staging_alias: str = "champion" 17 | production_alias: str = "production" 18 | registry_model_name: str 19 | 20 | def promote_model_to_alias( 21 | self, client, model_name, current_alias: str = "champion", new_alias: str = "production" 22 | ) -> None: 23 | logger = self.logger_service.logger() 24 | 25 | # Retrieve the model version using the current alias 26 | model_version = client.get_model_version_by_alias(name=model_name, alias=current_alias) 27 | 28 | # Access and print the tags of the model version 29 | if model_version.tags["passed_tests"] == "True": 30 | logger.success("Model version passed tests, promoting to production") 31 | # Set the new alias to the retrieved model version 32 | client.set_registered_model_alias( 33 | name=model_name, alias=new_alias, version=model_version.version 34 | ) 35 | 36 | else: 37 | logger.warning("Model version did not pass tests, archiving model") 38 | client.delete_registered_model_alias(name=model_name, alias=current_alias) 39 | 40 | @T.override 41 | def run(self) -> base.Locals: 42 | # services 43 | # - logger 44 | logger = self.logger_service.logger() 45 | 46 | # - mlflow 47 | client = self.mlflow_service.client() 48 | logger.info("With client: {}", client.tracking_uri) 49 | 50 | logger.info( 51 | f"Deploying Model Named {self.registry_model_name} from {self.staging_alias} to {self.production_alias}" 52 | ) 53 | self.promote_model_to_alias( 54 | client=client, 55 | model_name=self.registry_model_name, 56 | current_alias=self.staging_alias, 57 | new_alias=self.production_alias, 58 | ) 59 | 60 | logger.success("Model deployment complete") 61 | 62 | return locals() 63 | 64 | 65 | if __name__ == "__main__": 66 | from pathlib import Path 67 | 68 | from llmops_project import settings 69 | from llmops_project.io import configs 70 | 71 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 72 | config_files = ["/deployment.yaml"] 73 | 74 | file_paths = [script_dir + "/confs/" + file for file in config_files] 75 | 76 | files = [configs.parse_file(file) for file in file_paths] 77 | 78 | config = configs.merge_configs([*files]) # type: ignore 79 | config["job"]["KIND"] = "DeployModelJob" # type: ignore 80 | 81 | object_ = configs.to_object(config) # python object 82 | 83 | setting = settings.MainSettings.model_validate(object_) 84 | 85 | with setting.job as runner: 86 | runner.run() 87 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/deployment/register_model.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | import typing as T 3 | from pathlib import Path 4 | from typing import Any, Dict, List 5 | 6 | import llmops_project.io.services as services 7 | import mlflow 8 | import mlflow.pyfunc 9 | from llmops_project.pipelines import base 10 | from mlflow import MlflowClient 11 | from pydantic import BaseModel, ValidationError 12 | 13 | logger = services.LoggerService().logger() 14 | 15 | 16 | # %% Function to log the model to MLflow 17 | def log_rag_model( 18 | model_path: str, config_path: str, input_example: T.Optional[Dict[str, Any]] = None 19 | ) -> str: 20 | # Load model configuration from the config file 21 | # Start an MLflow run and log the model 22 | logger.warning("Config_path") 23 | with mlflow.start_run(run_name="rag_with_guardrails") as run: 24 | mlflow.langchain.log_model( 25 | lc_model=model_path, # Path to the chain code file 26 | model_config=config_path, # Path to the chain configuration file 27 | artifact_path="chain", # Required by MLflow 28 | code_paths=[ 29 | config_path 30 | ], # dependency definition included for the model to successfully import the implementation 31 | input_example=input_example, # Input example for schema logging 32 | example_no_conversion=True, # Use input_example directly as the chain's schema 33 | ) 34 | return run.info.run_id # Return the run ID for model registration 35 | 36 | 37 | # %% Function to register the model in the MLflow model registry 38 | def register_model(client: MlflowClient, run_id: str, model_name: str): 39 | model_uri = f"runs:/{run_id}/chain" 40 | result = mlflow.register_model(model_uri=model_uri, name=model_name) 41 | logger.success( 42 | f"Model registered successfully with name: {model_name}, version {result.version}" 43 | ) 44 | 45 | client.set_registered_model_tag(name=model_name, key="model", value="claude3-haiku") 46 | 47 | 48 | def load_model_by_alias(model_name, alias=None): 49 | # Construct the model URI using the alias if provided 50 | if alias: 51 | model_uri = f"models:/{model_name}@{alias}" 52 | else: 53 | model_uri = f"models:/{model_name}/latest" 54 | 55 | # Load the model 56 | model = mlflow.langchain.load_model(model_uri) 57 | 58 | return model 59 | 60 | 61 | # Step 1: Define the expected output schema using Pydantic 62 | class OutputSchema(BaseModel): 63 | result: str 64 | sources: List[Any] 65 | 66 | 67 | # Step 2: Create a function to validate the output against the schema 68 | def validate_output_schema(output: Dict[str, Any], schema: OutputSchema) -> bool: 69 | try: 70 | # Step 3: Validate the output against the schema model 71 | schema.model_validate(output) 72 | return True 73 | except ValidationError as e: 74 | print(f"Validation error: {e}") 75 | return False 76 | 77 | 78 | def validate_model_signature( 79 | client: MlflowClient, 80 | model_name: str, 81 | vector_store_path: str, 82 | alias=None, 83 | ): 84 | """ 85 | Validates the model signature by testing it against relevant and non-relevant dialogs. 86 | 87 | Args: 88 | model_name (str): The name of the model to validate. 89 | alias (str, optional): An alias for the model. Defaults to None. 90 | 91 | Returns: 92 | None 93 | 94 | Raises: 95 | ValueError: If the model fails the schema or guardrail tests. 96 | 97 | This function performs the following steps: 98 | 1. Loads the model using the provided name and alias. 99 | 2. Invokes the model with relevant and non-relevant dialogs. 100 | 3. Validates the model's output against a predefined schema. 101 | 4. Checks if the model's output passes guardrail tests. 102 | 5. Updates the model's tags in MLflow with the test results. 103 | """ 104 | # script_dir = Path(__file__).resolve().parent.parent 105 | # project_root = script_dir.parent.parent.parent # Adjusted to get to the project root as needed 106 | 107 | # vector_store_path = project_root / "faiss_db/" 108 | 109 | # Load Relevant Dialog and Non Relevant Dialog 110 | non_relevant_dialog = { # This will test Guardrail 111 | "messages": [ 112 | {"role": "user", "content": "What is the company's sick leave policy?"}, 113 | { 114 | "role": "assistant", 115 | "content": "The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.", 116 | }, 117 | {"role": "user", "content": "What is the meaning of life?"}, 118 | ], 119 | "vector_store_path": vector_store_path, 120 | } 121 | 122 | relevant_dialog = { # This will test schema 123 | "messages": [ 124 | {"role": "user", "content": "What is the company sick leave policy?"}, 125 | ], 126 | "vector_store_path": vector_store_path, 127 | } 128 | 129 | model = load_model_by_alias(model_name, alias) 130 | 131 | non_relevant_result = model.invoke(non_relevant_dialog) 132 | relevant_result = model.invoke(relevant_dialog) 133 | 134 | logger.debug(f"Relevant Result: {relevant_result}") 135 | logger.debug(f"Non Relevant Result: {non_relevant_result}") 136 | # Validate the output against the schema 137 | is_schema_valid = validate_output_schema(relevant_result, OutputSchema) # type: ignore 138 | if is_schema_valid: 139 | logger.success("Model Passsed Schema Tests") 140 | else: 141 | logger.error("Model Failed Schema Tests") 142 | 143 | # Validate Guardrail 144 | # Specific value to validate against 145 | guardrail_valid_output = {"result": "I cannot answer this question.", "sources": []} 146 | 147 | passed_guardrail_test = guardrail_valid_output == non_relevant_result 148 | if passed_guardrail_test: 149 | logger.success("Model Passsed Guadrail Tests") 150 | else: 151 | logger.error("Model Failed Guadrail Tests") 152 | 153 | if passed_guardrail_test and is_schema_valid: 154 | logger.success("Model Passed all tests") 155 | passed_tests = True 156 | else: 157 | logger.error("Model Failed tests") 158 | passed_tests = False 159 | 160 | # Update Model Tags 161 | 162 | filter_string = f"name = '{model_name}'" 163 | results = client.search_model_versions(filter_string=filter_string) 164 | latest_version = max(results, key=lambda mv: int(mv.version)) 165 | 166 | client.set_model_version_tag( 167 | name=model_name, version=latest_version.version, key="passed_tests", value=str(passed_tests) 168 | ) 169 | 170 | 171 | def promote_model(client: MlflowClient, model_name: str, alias: str): 172 | # Get latest version 173 | filter_string = f"name = '{model_name}'" 174 | results = client.search_model_versions(filter_string=filter_string) 175 | latest_version = max(results, key=lambda mv: int(mv.version)) 176 | tags = latest_version.tags 177 | 178 | if tags["passed_tests"].lower() == "true": 179 | client.set_registered_model_alias( 180 | name=model_name, alias=alias, version=latest_version.version 181 | ) 182 | else: 183 | logger.error( 184 | "COULD NOT PROMOTE MODEL: MODEL FAILED TESTS OR IS NOTE BETTER THAN PREVIOUS MODEL" 185 | ) 186 | 187 | 188 | # %% Job class for logging and registering the RAG model 189 | class LogAndRegisterModelJob(base.Job): # type: ignore[misc] 190 | """Job to log and register the RAG model in MLflow. 191 | 192 | Parameters: 193 | run_config (services.MlflowService.RunConfig): mlflow run config. 194 | """ 195 | 196 | KIND: T.Literal["LogAndRegisterModelJob"] = "LogAndRegisterModelJob" 197 | 198 | registry_model_name: str 199 | staging_alias: str = "champion" 200 | llm_model_code_path: str 201 | llm_confs: str 202 | vector_store_path: str 203 | 204 | @T.override 205 | def run(self) -> base.Locals: 206 | # services 207 | # - logger 208 | logger = self.logger_service.logger() 209 | 210 | # - mlflow 211 | client = self.mlflow_service.client() 212 | logger.info("With client: {}", client.tracking_uri) 213 | 214 | logger.info(f"Logging Model Named {self.registry_model_name}") 215 | 216 | # Load Configuration 217 | script_dir = Path(__file__).parent.parent.parent.parent.parent 218 | config_path = str(str(script_dir) + self.llm_confs) 219 | llm_code_path = str(str(script_dir) + self.llm_model_code_path) 220 | vector_store_path = str(str(script_dir) + self.vector_store_path) 221 | 222 | logger.info(f"CONFIG PATH: {config_path}") 223 | 224 | model_specs = mlflow.models.ModelConfig(development_config=config_path) 225 | input_example = model_specs.get("input_example") 226 | 227 | run_id = log_rag_model( 228 | llm_code_path, config_path, input_example=input_example 229 | ) # Log the model and get the run ID 230 | 231 | logger.info(f"Registering Model Named {self.registry_model_name}") 232 | register_model(client, run_id, self.registry_model_name) # Register the model 233 | 234 | logger.info(f"Validating Model Signature for {self.registry_model_name}") 235 | validate_model_signature( 236 | client, 237 | model_name=self.registry_model_name, 238 | vector_store_path=self.vector_store_path, 239 | alias=None, 240 | ) 241 | 242 | promote_model( 243 | client=client, model_name=self.registry_model_name, alias="champion" 244 | ) # Promote model to champion if he passed the tests 245 | logger.info(f"Promoting Model Named {self.registry_model_name} to {self.staging_alias}") 246 | logger.success("Model Registration complete") 247 | return locals() 248 | 249 | 250 | if __name__ == "__main__": 251 | from pathlib import Path 252 | 253 | from llmops_project import settings 254 | from llmops_project.io import configs 255 | 256 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 257 | config_files = ["/deployment.yaml"] 258 | 259 | file_paths = [script_dir + "/confs/" + file for file in config_files] 260 | 261 | files = [configs.parse_file(file) for file in file_paths] 262 | 263 | config = configs.merge_configs([*files]) # type: ignore 264 | config["job"]["KIND"] = "LogAndRegisterModelJob" # type: ignore 265 | 266 | object_ = configs.to_object(config) # python object 267 | 268 | setting = settings.MainSettings.model_validate(object_) 269 | 270 | with setting.job as runner: 271 | runner.run() 272 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/feature_engineering/create_vector_db.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | import os 3 | import typing as T 4 | from pathlib import Path 5 | 6 | import dotenv 7 | from langchain_aws import BedrockEmbeddings 8 | from llmops_project.io.vector_db import QdrantVectorDB 9 | 10 | # import faiss 11 | from llmops_project.pipelines import base 12 | 13 | 14 | # %% Job class for creating the vector database 15 | class CreateVectorDBJob(base.Job): # type: ignore[misc] 16 | """Job to create an empty FAISS vector store. 17 | 18 | Parameters: 19 | run_config (services.MlflowService.RunConfig): mlflow run config. 20 | """ 21 | 22 | KIND: T.Literal["CreateVectorDBJob"] = "CreateVectorDBJob" 23 | 24 | embedding_model: str 25 | collection_name: str 26 | vector_store_path: str 27 | 28 | @T.override 29 | def run(self) -> base.Locals: 30 | # Setup services 31 | # services 32 | # - logger 33 | logger = self.logger_service.logger() 34 | 35 | # Run the main pipeline function to create the empty vector database 36 | # Load .env file on the grandparent folder 37 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 38 | dotenv.load_dotenv(script_dir + "/.env") 39 | 40 | embeddings = BedrockEmbeddings(model_id=self.embedding_model) 41 | 42 | vector_db = QdrantVectorDB( 43 | embeddings_model=embeddings, 44 | collection_name=self.collection_name, 45 | url=self.vector_store_path, 46 | api_key=os.getenv("QDRANT_API_KEY"), 47 | vector_size=1536, 48 | ) 49 | 50 | logger.info("Initializing empty Qdrant Collection vector store...") 51 | 52 | try: 53 | vector_db.create_vector_db() 54 | except Exception as e: 55 | if "409" in str(e): 56 | logger.warning(f"Collection {self.collection_name} already exists") 57 | else: 58 | raise e 59 | 60 | logger.success( 61 | f"{vector_db.__class__.__name__} vector store created successfully on path {self.vector_store_path}" 62 | ) 63 | return locals() 64 | 65 | 66 | if __name__ == "__main__": 67 | # Test the pipeline 68 | 69 | from pathlib import Path 70 | 71 | from llmops_project import settings 72 | from llmops_project.io import configs 73 | 74 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 75 | config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"] 76 | 77 | file_paths = [script_dir + "/confs/" + file for file in config_files] 78 | 79 | files = [configs.parse_file(file) for file in file_paths] 80 | 81 | config = configs.merge_configs([*files]) # type: ignore 82 | config["job"]["KIND"] = "CreateVectorDBJob" # type: ignore 83 | 84 | object_ = configs.to_object(config) # python object 85 | 86 | setting = settings.MainSettings.model_validate(object_) 87 | 88 | with setting.job as runner: 89 | runner.run() 90 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/feature_engineering/ingest_documents.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | import os 3 | import typing as T 4 | from pathlib import Path 5 | 6 | import dotenv 7 | from langchain_aws import BedrockEmbeddings 8 | from llmops_project.io import services 9 | from llmops_project.io.vector_db import QdrantVectorDB 10 | from llmops_project.pipelines import base 11 | 12 | logger = services.LoggerService().logger() 13 | 14 | 15 | # %% Job class for ingesting documents and updating the vector database 16 | class IngestAndUpdateVectorDBJob(base.Job): # type: ignore[misc] 17 | """Job to ingest documents and update the FAISS vector store. 18 | 19 | Parameters: 20 | run_config (services.MlflowService.RunConfig): mlflow run config. 21 | """ 22 | 23 | KIND: T.Literal["IngestAndUpdateVectorDBJob"] = "IngestAndUpdateVectorDBJob" 24 | 25 | embedding_model: str 26 | vector_store_path: str 27 | collection_name: str 28 | document_path: str 29 | 30 | @T.override 31 | def run(self) -> base.Locals: 32 | # Setup services 33 | # services 34 | # - logger 35 | logger = self.logger_service.logger() 36 | 37 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 38 | document_path = script_dir + self.document_path 39 | dotenv.load_dotenv(script_dir + "/.env") 40 | 41 | logger.info(f"Loading Documents from {document_path}...") 42 | 43 | embeddings = BedrockEmbeddings(model_id=self.embedding_model) 44 | 45 | vector_db = QdrantVectorDB( 46 | embeddings_model=embeddings, 47 | collection_name=self.collection_name, 48 | url=self.vector_store_path, 49 | api_key=os.getenv("QDRANT_API_KEY"), 50 | vector_size=1536, 51 | ) 52 | 53 | logger.info( 54 | f"Ingesting documents and updating the {vector_db.__class__.__name__} vector store..." 55 | ) 56 | 57 | vector_db.ingest_documents(document_path) 58 | 59 | logger.success("Documents ingested and vector store updated successfully") 60 | # test_vectordb() 61 | return locals() 62 | 63 | 64 | if __name__ == "__main__": 65 | # Test the pipeline 66 | 67 | from llmops_project import settings 68 | from llmops_project.io import configs 69 | 70 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 71 | config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"] 72 | 73 | file_paths = [script_dir + "/confs/" + file for file in config_files] 74 | 75 | files = [configs.parse_file(file) for file in file_paths] 76 | 77 | config = configs.merge_configs([*files]) # type: ignore 78 | config["job"]["KIND"] = "IngestAndUpdateVectorDBJob" # type: ignore 79 | 80 | object_ = configs.to_object(config) # python object 81 | 82 | setting = settings.MainSettings.model_validate(object_) 83 | 84 | with setting.job as runner: 85 | runner.run() 86 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/managers/deployment_manager.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | from pathlib import Path 3 | 4 | from llmops_project.pipelines import base 5 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob 6 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob 7 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob 8 | 9 | AUTOMATIC_DEPLOYMENT = True 10 | 11 | 12 | # %% Job class for logging and registering the RAG model 13 | class DeploymentJob(base.Job): # type: ignore[misc] 14 | """Job to log and register the RAG model in MLflow. 15 | 16 | Parameters: 17 | run_config (services.MlflowService.RunConfig): mlflow run config. 18 | """ 19 | 20 | KIND: T.Literal["DeploymentJob"] = "DeploymentJob" 21 | 22 | # Deployment 23 | registry_model_name: str 24 | llm_model_code_path: str 25 | llm_confs: str 26 | staging_alias: str = "champion" 27 | production_alias: str = "production" 28 | 29 | # Evaluation 30 | qa_dataset_path: str 31 | alias: str 32 | vector_store_path: str 33 | metric_tresholds: dict[str, float] 34 | 35 | @T.override 36 | def run(self) -> base.Locals: 37 | # services 38 | # - logger 39 | logger = self.logger_service.logger() 40 | 41 | logger.info("Starting Model Deployment Workflow") 42 | logger.info("Step: Log and Register Model") 43 | 44 | # Log and register the model 45 | with LogAndRegisterModelJob( 46 | registry_model_name=self.registry_model_name, 47 | staging_alias=self.staging_alias, 48 | vector_store_path=self.vector_store_path, 49 | llm_model_code_path=self.llm_model_code_path, 50 | llm_confs=self.llm_confs, 51 | ) as log_and_register_job: 52 | log_and_register_job.run() 53 | 54 | logger.info("Step: Evaluate Model") 55 | 56 | # Evaluate the model 57 | with EvaluateModelJob( 58 | registry_model_name=self.registry_model_name, 59 | qa_dataset_path=self.qa_dataset_path, 60 | alias=self.alias, 61 | vector_store_path=self.vector_store_path, 62 | metric_tresholds=self.metric_tresholds, 63 | ) as evaluate_job: 64 | evaluate_job.run() 65 | 66 | if not AUTOMATIC_DEPLOYMENT: 67 | logger.warning("Automatic Deployment is disabled") 68 | return locals() 69 | 70 | else: 71 | logger.info("Step: Deploy Model") 72 | 73 | # Deploy the model 74 | with DeployModelJob( 75 | staging_alias=self.alias, 76 | production_alias=self.production_alias, 77 | registry_model_name=self.registry_model_name, 78 | ) as deploy_job: 79 | deploy_job.run() # Automatic Deployment 80 | 81 | logger.success("Model Deployment Workflow complete") 82 | 83 | return locals() 84 | 85 | 86 | if __name__ == "__main__": 87 | from llmops_project import settings 88 | from llmops_project.io import configs 89 | 90 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 91 | config_files = ["/deployment.yaml", "/monitoring.yaml"] 92 | 93 | file_paths = [script_dir + "/confs/" + file for file in config_files] 94 | 95 | files = [configs.parse_file(file) for file in file_paths] 96 | 97 | config = configs.merge_configs([*files]) # type: ignore 98 | config["job"]["KIND"] = "DeploymentJob" # type: ignore 99 | 100 | object_ = configs.to_object(config) # python object 101 | 102 | setting = settings.MainSettings.model_validate(object_) 103 | 104 | with setting.job as runner: 105 | runner.run() 106 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/managers/feature_engineering_manager.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | from pathlib import Path 3 | 4 | from llmops_project.pipelines import base 5 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob 6 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob 7 | 8 | 9 | # %% Job class for logging and registering the RAG model 10 | class FeatureEngineeringJob(base.Job): # type: ignore[misc] 11 | """Job to log and register the RAG model in MLflow. 12 | 13 | Parameters: 14 | run_config (services.MlflowService.RunConfig): mlflow run config. 15 | """ 16 | 17 | KIND: T.Literal["FeatureEngineeringJob"] = "FeatureEngineeringJob" 18 | 19 | embedding_model: str 20 | vector_store_path: str 21 | document_path: str 22 | collection_name: str 23 | 24 | @T.override 25 | def run(self) -> base.Locals: 26 | # Setup services 27 | # services 28 | # - logger 29 | logger = self.logger_service.logger() 30 | 31 | logger.info("Starting Feature Engineering Workflow") 32 | 33 | # Ensure the config path is relative to this script's location 34 | script_dir = Path(__file__).resolve().parent.parent.parent.parent.parent 35 | document_path = str(script_dir / self.document_path) 36 | 37 | logger.info("Creating Vector Database") 38 | 39 | # Create the vector database 40 | with CreateVectorDBJob( 41 | embedding_model=self.embedding_model, 42 | vector_store_path=self.vector_store_path, 43 | collection_name=self.collection_name, 44 | ) as create_vector_db_job: 45 | create_vector_db_job.run() 46 | 47 | # Ingest the documents 48 | with IngestAndUpdateVectorDBJob( 49 | embedding_model=self.embedding_model, 50 | vector_store_path=self.vector_store_path, 51 | collection_name=self.collection_name, 52 | document_path=document_path, 53 | ) as injest_job: 54 | injest_job.run() 55 | 56 | logger.success("Feature Engineering Workflow complete") 57 | 58 | return locals() 59 | 60 | 61 | if __name__ == "__main__": 62 | # Test the pipeline 63 | 64 | from llmops_project import settings 65 | from llmops_project.io import configs 66 | 67 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 68 | config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"] 69 | 70 | file_paths = [script_dir + "/confs/" + file for file in config_files] 71 | 72 | files = [configs.parse_file(file) for file in file_paths] 73 | 74 | config = configs.merge_configs([*files]) # type: ignore 75 | config["job"]["KIND"] = "FeatureEngineeringJob" # type: ignore 76 | 77 | object_ = configs.to_object(config) # python object 78 | 79 | setting = settings.MainSettings.model_validate(object_) 80 | 81 | with setting.job as runner: 82 | runner.run() 83 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/managers/monitoring_manager.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | from pathlib import Path 3 | 4 | from llmops_project.pipelines import base 5 | from llmops_project.pipelines.monitoring.post_deploy_eval import MonitoringEvalJob 6 | 7 | 8 | # %% Job class for logging and registering the RAG model 9 | class MonitoringJob(base.Job, frozen=True): # type: ignore[misc] 10 | """Job to orchestrate Monitoring Workflow. 11 | 12 | Parameters: 13 | run_config (services.MlflowService.RunConfig): mlflow run config. 14 | """ 15 | 16 | KIND: T.Literal["MonitoringJob"] = "MonitoringJob" 17 | 18 | trace_experiment_name: str 19 | monitoring_experiment_name: str 20 | filter_string: T.Optional[str] = None 21 | 22 | @T.override 23 | def run(self) -> base.Locals: 24 | # services 25 | # - logger 26 | logger = self.logger_service.logger() 27 | 28 | logger.info("Starting Model Monitoring Workflow") 29 | 30 | MonitoringEvalJob( 31 | trace_experiment_name=self.trace_experiment_name, 32 | monitoring_experiment_name=self.monitoring_experiment_name, 33 | filter_string=self.filter_string, 34 | ).run() 35 | 36 | logger.success("Model Monitoring Workflow complete") 37 | 38 | return locals() 39 | 40 | 41 | if __name__ == "__main__": 42 | from llmops_project import settings 43 | from llmops_project.io import configs 44 | 45 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 46 | config_files = ["/rag_chain_config.yaml", "/monitoring.yaml"] 47 | 48 | file_paths = [script_dir + "/confs/" + file for file in config_files] 49 | 50 | files = [configs.parse_file(file) for file in file_paths] 51 | 52 | config = configs.merge_configs([*files]) # type: ignore 53 | config["job"]["KIND"] = "MonitoringJob" # type: ignore 54 | 55 | object_ = configs.to_object(config) # python object 56 | 57 | setting = settings.MainSettings.model_validate(object_) 58 | 59 | with setting.job as runner: 60 | runner.run() 61 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/monitoring/generate_rag_dataset.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | from pathlib import Path 3 | 4 | import nest_asyncio 5 | from llama_index.core import SimpleDirectoryReader 6 | from llama_index.core.llama_dataset.generator import RagDatasetGenerator 7 | from llama_index.llms.bedrock import Bedrock 8 | from llmops_project.pipelines import base 9 | 10 | nest_asyncio.apply() 11 | 12 | 13 | # %% Job class for generating the RAG dataset 14 | class GenerateRagDatasetJob(base.Job): # type: ignore[misc] 15 | """Job to Generate RAG evaluation dataset from documents in the specified data path. 16 | 17 | Parameters: 18 | run_config (services.MlflowService.RunConfig): mlflow run config. 19 | """ 20 | 21 | KIND: T.Literal["GenerateRagDatasetJob"] = "GenerateRagDatasetJob" 22 | 23 | data_path: str 24 | qa_dataset_path_csv: str 25 | qa_dataset_path_json: str 26 | llm_model: str 27 | 28 | def generate_rag_dataset( 29 | self, data_path: str, final_dataset_csv_path: str, final_dataset_json_path: str, model: str 30 | ): 31 | """Generate a RAG dataset from documents in the specified data path. 32 | 33 | Args: 34 | data_path (str): Path to the directory containing the data. 35 | final_dataset_path (str): Path where the final dataset CSV will be saved. 36 | model (str): The model to be used for generating the dataset. 37 | """ 38 | nest_asyncio.apply() 39 | logger = self.logger_service.logger() 40 | 41 | # Convert string paths to Path objects 42 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 43 | data_path = script_dir + self.data_path 44 | 45 | final_dataset_path = final_dataset_csv_path 46 | 47 | logger.info("Loading Data from ", data_path) 48 | # Load documents from the specified data path 49 | reader = SimpleDirectoryReader(data_path) 50 | documents = reader.load_data() 51 | 52 | logger.info("Loaded {} documents".format(len(documents))) 53 | 54 | # Initialize the LLM with the specified model 55 | llm = Bedrock(model=model, request_timeout=60.0) 56 | 57 | # Generate the dataset from the documents 58 | dataset_generator = RagDatasetGenerator.from_documents( 59 | documents, 60 | llm=llm, 61 | num_questions_per_chunk=2, 62 | show_progress=True, 63 | ) 64 | 65 | # Generate the RAG dataset 66 | rag_dataset = dataset_generator.generate_dataset_from_nodes() 67 | 68 | # Convert the dataset to a pandas DataFrame and save it as a CSV 69 | df_dataset = rag_dataset.to_pandas() 70 | df_dataset.to_csv(final_dataset_path) 71 | 72 | # Save the dataset as a JSON file 73 | rag_dataset.save_json(final_dataset_json_path) 74 | 75 | logger.success("RAG dataset generated successfully and saved to {}", final_dataset_path) 76 | 77 | @T.override 78 | def run(self) -> base.Locals: 79 | # services 80 | # - logger 81 | logger = self.logger_service.logger() 82 | 83 | # Set up paths 84 | # Ensure the paths are relative to this script's location 85 | script_dir = Path(__file__).resolve().parent.parent 86 | project_root = ( 87 | script_dir.parent.parent.parent 88 | ) # Adjusted to get to the project root as needed 89 | 90 | data_path = str(project_root / self.data_path) 91 | final_dataset_path = str(project_root / self.qa_dataset_path_csv) 92 | final_dataset_json_path = str(project_root / self.qa_dataset_path_json) 93 | 94 | # Generate RAG Dataset 95 | logger.info("Generating RAG dataset from documents in {}", data_path) 96 | self.generate_rag_dataset( 97 | data_path, final_dataset_path, final_dataset_json_path, self.llm_model 98 | ) 99 | 100 | logger.success("RAG dataset generated successfully") 101 | 102 | return locals() 103 | 104 | 105 | if __name__ == "__main__": 106 | from pathlib import Path 107 | 108 | from llmops_project import settings 109 | from llmops_project.io import configs 110 | 111 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 112 | config_files = ["/generate_rag_dataset.yaml"] 113 | 114 | file_paths = [script_dir + "/confs/" + file for file in config_files] 115 | 116 | files = [configs.parse_file(file) for file in file_paths] 117 | 118 | config = configs.merge_configs([*files]) # type: ignore 119 | config["job"]["KIND"] = "GenerateRagDatasetJob" # type: ignore 120 | 121 | object_ = configs.to_object(config) # python object 122 | 123 | setting = settings.MainSettings.model_validate(object_) 124 | 125 | with setting.job as runner: 126 | runner.run() 127 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/monitoring/post_deploy_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import typing as T 4 | from pathlib import Path 5 | from typing import Optional 6 | 7 | import mlflow 8 | import pandas as pd 9 | import plotly.graph_objects as go 10 | from llmops_project.pipelines import base 11 | from plotly.subplots import make_subplots 12 | 13 | 14 | def filter_generations(df): 15 | return df[ 16 | df["response"].apply( 17 | lambda x: "generations" not in json.loads(x) if pd.notnull(x) else True 18 | ) 19 | ] 20 | 21 | 22 | def extract_answer(data): 23 | if data: 24 | data_dict = json.loads(data) 25 | if "result" in data_dict: 26 | return data_dict["result"] 27 | return None 28 | 29 | 30 | def extract_last_message_content(request): 31 | return json.loads(request)["messages"][-1]["content"] 32 | 33 | 34 | def create_gauge_chart(value1, title1, value2, title2): 35 | # Create a subplot figure with two columns 36 | fig = make_subplots(rows=1, cols=2, specs=[[{"type": "indicator"}, {"type": "indicator"}]]) 37 | 38 | # Add the first gauge chart 39 | fig.add_trace( 40 | go.Indicator( 41 | mode="gauge+number", 42 | value=value1, 43 | title={"text": title1}, 44 | gauge={"axis": {"range": [None, 18]}}, 45 | ), 46 | row=1, 47 | col=1, 48 | ) 49 | 50 | # Add the second gauge chart 51 | fig.add_trace( 52 | go.Indicator( 53 | mode="gauge+number", 54 | value=value2, 55 | title={"text": title2}, 56 | gauge={"axis": {"range": [None, 100]}}, 57 | ), 58 | row=1, 59 | col=2, 60 | ) 61 | 62 | # Update layout 63 | fig.update_layout(height=400, width=800) 64 | 65 | # Show figure 66 | # fig.show() 67 | 68 | return fig 69 | 70 | 71 | class MonitoringEvalJob(base.Job): # type: ignore[misc] 72 | """Job to Evaluate the challenger model based on a QA dataset.""" 73 | 74 | KIND: T.Literal["MonitoringEvalJob"] = "MonitoringEvalJob" 75 | 76 | trace_experiment_name: str 77 | monitoring_experiment_name: str 78 | filter_string: Optional[str] = None 79 | 80 | @T.override 81 | def run(self) -> base.Locals: 82 | """Run the job to evaluate the model. 83 | 84 | Returns: 85 | base.Locals: The local variables after running the job. 86 | """ 87 | 88 | # services 89 | # - logger 90 | logger = self.logger_service.logger() 91 | 92 | # - mlflow 93 | client = self.mlflow_service.client() 94 | logger.info("With client: {}", client.tracking_uri) 95 | 96 | experiment = mlflow.get_experiment_by_name(self.trace_experiment_name) 97 | if experiment: 98 | experiment_id = experiment.experiment_id 99 | logger.info(f"Experiment ID: {experiment_id}") 100 | else: 101 | logger.error("Experiment with the traces not found.") 102 | return locals() # Add return statement here 103 | 104 | # Set the filter string to only include runs from the last week 105 | if self.filter_string is None: 106 | one_week_ago = int((time.time() - 7 * 24 * 60 * 60) * 1000) # Convert to milliseconds 107 | filter_string = f"trace.timestamp_ms > {one_week_ago}" 108 | logger.success("Monitoring traces from the last week") 109 | 110 | else: 111 | filter_string = self.filter_string 112 | 113 | # Search all the traces in the experiment that match the filter string 114 | traces_df = mlflow.search_traces( 115 | experiment_ids=[experiment_id], 116 | filter_string=filter_string, 117 | max_results=2000, 118 | ) 119 | 120 | # Filter error traces 121 | traces_df = traces_df[traces_df["status"] != "TraceStatus.ERROR"] 122 | traces_df = filter_generations(traces_df) 123 | 124 | # Extract the answer and question from the request and response 125 | traces_df["answer"] = traces_df["response"].apply(extract_answer) 126 | traces_df["question"] = traces_df["request"].apply(extract_last_message_content) 127 | 128 | # Create a DataFrame with the inputs and predictions 129 | eval_df = traces_df[["question", "answer"]] 130 | eval_df = eval_df.rename(columns={"question": "inputs", "answer": "predictions"}) 131 | 132 | # remove predictions with None values 133 | eval_df = eval_df.dropna() 134 | 135 | # Get the current week number 136 | current_week = time.strftime("CW%U") 137 | 138 | mlflow.set_experiment(self.monitoring_experiment_name) 139 | 140 | logger.info( 141 | "Monitoring results to be logged in experiment: {}", self.monitoring_experiment_name 142 | ) 143 | 144 | answer_relevance = mlflow.metrics.genai.answer_relevance( # Compares input with predictions to check if its relevant (good for monitoring) 145 | model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0", 146 | parameters={ 147 | "temperature": 0, 148 | "anthropic_version": "bedrock-2023-05-31", 149 | }, 150 | ) 151 | 152 | with mlflow.start_run(run_name=current_week): 153 | results = mlflow.evaluate( # type: ignore 154 | data=eval_df[["inputs", "predictions"]], 155 | predictions="predictions", 156 | model_type="text", 157 | evaluators=["default"], 158 | extra_metrics=[answer_relevance], 159 | ) 160 | 161 | toxicity_score = results.metrics["toxicity/v1/mean"] 162 | # Calculate non-toxicity score 163 | non_toxicity_score = "{:.2f}".format((1 - toxicity_score) * 100) 164 | readability_score = "{:.2f}".format( 165 | results.metrics["flesch_kincaid_grade_level/v1/mean"] 166 | ) 167 | logger.info("Non Toxicity Score: {}", non_toxicity_score) 168 | logger.info("Readability Score: {}", readability_score) 169 | 170 | guage = create_gauge_chart( 171 | float(readability_score), 172 | "English Readability score", 173 | float(non_toxicity_score), 174 | "Non Toxicity Score", 175 | ) 176 | mlflow.log_figure(guage, "gauge_chart.png") 177 | 178 | logger.success("Model Monitoring completed successfully.") 179 | 180 | return locals() 181 | 182 | 183 | if __name__ == "__main__": 184 | from pathlib import Path 185 | 186 | from llmops_project import settings 187 | from llmops_project.io import configs 188 | 189 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 190 | config_files = ["/monitoring.yaml"] 191 | 192 | file_paths = [script_dir + "/confs/" + file for file in config_files] 193 | 194 | files = [configs.parse_file(file) for file in file_paths] 195 | 196 | config = configs.merge_configs([*files]) # type: ignore 197 | config["job"]["KIND"] = "MonitoringEvalJob" # type: ignore 198 | 199 | object_ = configs.to_object(config) # python object 200 | 201 | setting = settings.MainSettings.model_validate(object_) 202 | 203 | with setting.job as runner: 204 | runner.run() 205 | -------------------------------------------------------------------------------- /src/llmops_project/pipelines/monitoring/pre_deploy_eval.py: -------------------------------------------------------------------------------- 1 | import typing as T 2 | from pathlib import Path 3 | 4 | import mlflow 5 | import pandas as pd 6 | from llmops_project.pipelines import base 7 | from mlflow import MlflowClient 8 | 9 | 10 | class EvaluateModelJob(base.Job): # type: ignore[misc] 11 | """Job to Evaluate the challenger model based on a QA dataset. 12 | 13 | Attributes: 14 | KIND (Literal["EvaluateModelJob"]): The kind of job. 15 | qa_dataset_path (str): Path to the QA dataset. 16 | registry_model_name (str): Name of the model in the registry. 17 | alias (str): Alias of the model version. 18 | vector_store_path (str): Path to the vector store. 19 | metric_tresholds (dict[float]): Dictionary of metric thresholds. 20 | """ 21 | 22 | KIND: T.Literal["EvaluateModelJob"] = "EvaluateModelJob" 23 | 24 | qa_dataset_path: str 25 | registry_model_name: str 26 | alias: str 27 | vector_store_path: str 28 | metric_tresholds: dict[str, float] 29 | 30 | def load_qa_dataset(self, data_path: str) -> pd.DataFrame: 31 | """Load the QA dataset from the specified path. 32 | 33 | Args: 34 | data_path (str): Path to the QA dataset. 35 | 36 | Returns: 37 | pd.DataFrame: The loaded QA dataset. 38 | """ 39 | df = pd.read_csv(data_path) 40 | df = df.copy() 41 | df = df.rename( 42 | columns={ 43 | "query": "inputs", 44 | "reference_answer": "ground_truth", 45 | "reference_contexts": "context", 46 | } 47 | ) 48 | return df 49 | 50 | def generate_python_function_from_model( 51 | self, model_name: str, model_alias: str, vector_db_path: str 52 | ) -> T.Callable[[pd.DataFrame], pd.Series]: 53 | """Generate a Python function from the model. 54 | 55 | Args: 56 | model_name (str): Name of the model. 57 | model_alias (str): Alias of the model version. 58 | vector_db_path (str): Path to the vector store. 59 | 60 | Returns: 61 | Callable[[pd.DataFrame], pd.Series]: A function that takes a DataFrame of inputs and returns a Series of predictions. 62 | """ 63 | model_uri = f"models:/{model_name}@{model_alias}" 64 | model = mlflow.langchain.load_model(model_uri) 65 | 66 | def model_qa(inputs: pd.Series) -> pd.Series: 67 | answers = [] 68 | for index, row in inputs.iterrows(): 69 | question = { 70 | "messages": [ 71 | {"role": "user", "content": f"{row['inputs']}"}, 72 | ], 73 | "vector_store_path": vector_db_path, 74 | } 75 | answer = model.invoke(question) 76 | answers.append(answer["result"]) 77 | return answers 78 | 79 | return model_qa 80 | 81 | def evaluate_model(self, eval_df: pd.DataFrame) -> mlflow.models.EvaluationResult: 82 | """Evaluate the model using the evaluation DataFrame. 83 | 84 | Args: 85 | eval_df (pd.DataFrame): DataFrame containing the evaluation data. 86 | 87 | Returns: 88 | mlflow.models.EvaluationResult: The evaluation results. 89 | """ 90 | with mlflow.start_run(): 91 | results = mlflow.evaluate( # type: ignore 92 | data=eval_df[["inputs", "ground_truth", "predictions"]], 93 | targets="ground_truth", 94 | predictions="predictions", 95 | model_type="question-answering", 96 | evaluators=["default"], 97 | ) 98 | return results 99 | 100 | def set_tag_for_model_evals( 101 | self, beats_baseline: bool, model_name: str, current_alias: str = "champion" 102 | ) -> None: 103 | """Set a tag for the model evaluations. 104 | 105 | Args: 106 | beats_baseline (bool): Whether the model meets the evaluation criteria. 107 | model_name (str): Name of the model. 108 | current_alias (str, optional): Alias of the current model version. Defaults to "champion". 109 | """ 110 | client = MlflowClient() 111 | model_version = client.get_model_version_by_alias(name=model_name, alias=current_alias) 112 | client.set_model_version_tag( 113 | name=model_name, 114 | version=model_version.version, 115 | key="meets_evaluation_criteria", 116 | value=beats_baseline, 117 | ) 118 | 119 | @T.override 120 | def run(self) -> base.Locals: 121 | """Run the job to evaluate the model. 122 | 123 | Returns: 124 | base.Locals: The local variables after running the job. 125 | """ 126 | # services 127 | logger = self.logger_service.logger() 128 | 129 | # Set up paths 130 | script_dir = str(Path(__file__).resolve().parent.parent.parent.parent.parent) 131 | 132 | logger.info("Script Directory: {}", script_dir) 133 | data_path = str(script_dir + self.qa_dataset_path) 134 | 135 | logger.info("Loading QA dataset from {}", data_path) 136 | eval_df = self.load_qa_dataset(data_path) 137 | model = self.generate_python_function_from_model( 138 | self.registry_model_name, self.alias, self.vector_store_path 139 | ) 140 | logger.info('Using Vector Store at "{}"', self.vector_store_path) 141 | 142 | logger.info("Running Predictions on the QA Dataset") 143 | eval_df["predictions"] = model(eval_df) 144 | 145 | logger.info("Evaluating the model") 146 | results = self.evaluate_model(eval_df) 147 | result_metrics = results.metrics 148 | 149 | metrics = [ 150 | result_metrics["flesch_kincaid_grade_level/v1/mean"], 151 | result_metrics["ari_grade_level/v1/mean"], 152 | ] 153 | 154 | logger.info("Model Evaluation Metrics: {}", result_metrics) 155 | 156 | thresholds = [ 157 | self.metric_tresholds["flesch_kincaid_grade_level_mean"], 158 | self.metric_tresholds["ari_grade_level_mean"], 159 | ] 160 | 161 | beats_baseline = True 162 | for metric, threshold in zip(metrics, thresholds): 163 | if metric < threshold: 164 | beats_baseline = False 165 | break 166 | 167 | logger.info(f"Model meets evaluation criteria: {beats_baseline}") 168 | 169 | self.set_tag_for_model_evals( 170 | beats_baseline, model_name=self.registry_model_name, current_alias=self.alias 171 | ) 172 | logger.success("Model evaluation complete") 173 | 174 | return locals() 175 | 176 | 177 | if __name__ == "__main__": 178 | from pathlib import Path 179 | 180 | from llmops_project import settings 181 | from llmops_project.io import configs 182 | 183 | script_dir = str(Path(__file__).parent.parent.parent.parent.parent) 184 | config_files = ["/monitoring.yaml"] 185 | 186 | file_paths = [script_dir + "/confs/" + file for file in config_files] 187 | 188 | files = [configs.parse_file(file) for file in file_paths] 189 | 190 | config = configs.merge_configs([*files]) # type: ignore 191 | config["job"]["KIND"] = "EvaluateModelJob" # type: ignore 192 | 193 | object_ = configs.to_object(config) # python object 194 | 195 | setting = settings.MainSettings.model_validate(object_) 196 | 197 | with setting.job as runner: 198 | runner.run() 199 | -------------------------------------------------------------------------------- /src/llmops_project/scripts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Scripts for the CLI application.""" 3 | 4 | # ruff: noqa: E402 5 | 6 | # %% WARNINGS 7 | 8 | import warnings 9 | 10 | # disable annoying mlflow warnings 11 | warnings.filterwarnings(action="ignore", category=UserWarning) 12 | 13 | # %% IMPORTS 14 | 15 | import argparse 16 | import json 17 | import sys 18 | 19 | from llmops_project import settings 20 | from llmops_project.io import configs 21 | 22 | # %% PARSERS 23 | 24 | parser = argparse.ArgumentParser(description="Run an AI/ML job from YAML/JSON configs.") 25 | parser.add_argument("files", nargs="*", help="Config files for the job (local path only).") 26 | parser.add_argument("-e", "--extras", nargs="*", default=[], help="Config strings for the job.") 27 | parser.add_argument("-s", "--schema", action="store_true", help="Print settings schema and exit.") 28 | 29 | # %% SCRIPTS 30 | 31 | 32 | def main(argv: list[str] | None = None) -> int: 33 | """Main script for the application.""" 34 | args = parser.parse_args(argv) 35 | if args.schema: 36 | schema = settings.MainSettings.model_json_schema() 37 | json.dump(schema, sys.stdout, indent=4) 38 | return 0 39 | files = [configs.parse_file(file) for file in args.files] 40 | strings = [configs.parse_string(string) for string in args.extras] 41 | if len(files) == 0 and len(strings) == 0: 42 | raise RuntimeError("No configs provided.") 43 | config = configs.merge_configs([*files, *strings]) 44 | object_ = configs.to_object(config) # python object 45 | setting = settings.MainSettings.model_validate(object_) 46 | with setting.job as runner: 47 | runner.run() 48 | return 0 49 | -------------------------------------------------------------------------------- /src/llmops_project/settings.py: -------------------------------------------------------------------------------- 1 | """Define settings for the application.""" 2 | 3 | # %% IMPORTS 4 | 5 | import pydantic as pdt 6 | import pydantic_settings as pdts 7 | 8 | from llmops_project import pipelines 9 | 10 | # %% SETTINGS 11 | 12 | 13 | class Settings(pdts.BaseSettings, strict=True, frozen=True, extra="allow"): # type: ignore[misc] 14 | """Base class for application settings. 15 | 16 | Use settings to provide high-level preferences. 17 | i.e., to separate settings from provider (e.g., CLI). 18 | """ 19 | 20 | 21 | class MainSettings(Settings): # type: ignore[misc] 22 | """Main settings of the application. 23 | 24 | Parameters: 25 | job (jobs.JobKind): job to run. 26 | """ 27 | 28 | job: pipelines.JobKind = pdt.Field(..., discriminator="KIND") 29 | -------------------------------------------------------------------------------- /static/autoscaling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/autoscaling.png -------------------------------------------------------------------------------- /static/experiment_tracking.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/experiment_tracking.png -------------------------------------------------------------------------------- /static/guage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/guage.png -------------------------------------------------------------------------------- /static/llmops-rag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmops-rag.png -------------------------------------------------------------------------------- /static/llmops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmops.png -------------------------------------------------------------------------------- /static/llmopsmindmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmopsmindmap.png -------------------------------------------------------------------------------- /static/model_version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/model_version.png -------------------------------------------------------------------------------- /static/monitoring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/monitoring.png -------------------------------------------------------------------------------- /static/rag_lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/rag_lifecycle.png -------------------------------------------------------------------------------- /static/tracing-top.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/tracing-top.gif -------------------------------------------------------------------------------- /static/vector_db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/vector_db.png -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | """Task collections for the project.""" 2 | 3 | # mypy: ignore-errors 4 | 5 | # %% IMPORTS 6 | 7 | from invoke import Collection 8 | 9 | from . import ( 10 | checks, 11 | cleans, 12 | commits, 13 | containers, 14 | docs, 15 | formats, 16 | installs, 17 | mlflow, 18 | packages, 19 | projects, 20 | serve, 21 | ) 22 | 23 | # %% NAMESPACES 24 | 25 | ns = Collection() 26 | 27 | # %% COLLECTIONS 28 | 29 | ns.add_collection(checks) 30 | ns.add_collection(cleans) 31 | ns.add_collection(commits) 32 | ns.add_collection(containers) 33 | ns.add_collection(docs) 34 | ns.add_collection(formats) 35 | ns.add_collection(installs) 36 | ns.add_collection(mlflow) 37 | ns.add_collection(packages) 38 | ns.add_collection(serve) 39 | ns.add_collection(projects, default=True) 40 | -------------------------------------------------------------------------------- /tasks/checks.py: -------------------------------------------------------------------------------- 1 | """Check tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def poetry(ctx: Context) -> None: 13 | """Check poetry config files.""" 14 | ctx.run("poetry check --lock", pty=True) 15 | 16 | 17 | @task 18 | def format(ctx: Context) -> None: 19 | """Check the formats with ruff.""" 20 | ctx.run("poetry run ruff format --check src/ tasks/ tests/", pty=True) 21 | 22 | 23 | @task 24 | def type(ctx: Context) -> None: 25 | """Check the types with mypy.""" 26 | ctx.run("poetry run mypy src/ tasks/ tests/", pty=True) 27 | 28 | 29 | @task 30 | def code(ctx: Context) -> None: 31 | """Check the codes with ruff.""" 32 | ctx.run("poetry run ruff check src/ tasks/ tests/", pty=True) 33 | 34 | 35 | @task 36 | def test(ctx: Context) -> None: 37 | """Check the tests with pytest.""" 38 | ctx.run( 39 | "poetry run pytest " 40 | "tests/pipelines/feature_engineering/test_create_vector_db.py " # Feature Engineering 41 | "tests/pipelines/feature_engineering/test_ingest_documents.py " # Feature Engineering 42 | "tests/pipelines/monitoring/test_generate_rag_dataset.py " # Monitoring 43 | "tests/pipelines/deployment/test_register_model.py " # Deployment 44 | "tests/pipelines/monitoring/test_pre_deploy_eval.py " # Monitoring 45 | "tests/pipelines/deployment/test_deploy_model.py " # Deployment 46 | "tests/io/test_services.py " # IO 47 | "tests/io/test_configs.py " # IO 48 | "tests/pipelines/test_base.py ", # Base 49 | # "--numprocesses='auto'" 50 | pty=True, 51 | ) 52 | 53 | 54 | @task 55 | def security(ctx: Context) -> None: 56 | """Check the security with bandit.""" 57 | ctx.run("poetry run bandit --recursive --configfile=pyproject.toml src/", pty=True) 58 | 59 | 60 | @task 61 | def coverage(ctx: Context) -> None: 62 | """Check the coverage with coverage.""" 63 | ctx.run( 64 | "poetry run pytest --cov=src/ --cov-fail-under=20 " 65 | "tests/pipelines/feature_engineering/test_create_vector_db.py " # Feature Engineering 66 | "tests/pipelines/feature_engineering/test_ingest_documents.py " # Feature Engineering 67 | "tests/pipelines/monitoring/test_generate_rag_dataset.py " # Monitoring 68 | "tests/pipelines/deployment/test_register_model.py " # Deployment 69 | "tests/pipelines/monitoring/test_pre_deploy_eval.py " # Monitoring 70 | "tests/pipelines/deployment/test_deploy_model.py " # Deployment 71 | "tests/io/test_services.py " # IO 72 | "tests/io/test_configs.py " # IO 73 | "tests/pipelines/test_base.py ", # Base 74 | # "--numprocesses='auto'" 75 | pty=True, 76 | ) 77 | 78 | 79 | @task(pre=[poetry, format, type, code, security, coverage], default=True) 80 | def all(_: Context) -> None: 81 | """Run all check tasks.""" 82 | -------------------------------------------------------------------------------- /tasks/cleans.py: -------------------------------------------------------------------------------- 1 | """Clean tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | # %% - Tools 11 | 12 | 13 | @task 14 | def mypy(ctx: Context) -> None: 15 | """Clean the mypy tool.""" 16 | ctx.run("rm -rf .mypy_cache/") 17 | 18 | 19 | @task 20 | def ruff(ctx: Context) -> None: 21 | """Clean the ruff tool.""" 22 | ctx.run("rm -rf .ruff_cache/") 23 | 24 | 25 | @task 26 | def pytest(ctx: Context) -> None: 27 | """Clean the pytest tool.""" 28 | ctx.run("rm -rf .pytest_cache/") 29 | 30 | 31 | @task 32 | def coverage(ctx: Context) -> None: 33 | """Clean the coverage tool.""" 34 | ctx.run("rm -f .coverage*") 35 | 36 | 37 | # %% - Folders 38 | 39 | 40 | @task 41 | def dist(ctx: Context) -> None: 42 | """Clean the dist folder.""" 43 | ctx.run("rm -f dist/*") 44 | 45 | 46 | @task 47 | def docs(ctx: Context) -> None: 48 | """Clean the docs folder.""" 49 | ctx.run("rm -rf docs/*") 50 | 51 | 52 | @task 53 | def cache(ctx: Context) -> None: 54 | """Clean the cache folder.""" 55 | ctx.run("rm -rf .cache/") 56 | 57 | 58 | @task 59 | def mlruns(ctx: Context) -> None: 60 | """Clean the mlruns folder.""" 61 | ctx.run("rm -rf mlruns/*") 62 | 63 | 64 | @task 65 | def outputs(ctx: Context) -> None: 66 | """Clean the outputs folder.""" 67 | ctx.run("rm -rf outputs/*") 68 | 69 | 70 | # %% - Sources 71 | 72 | 73 | @task 74 | def venv(ctx: Context) -> None: 75 | """Clean the venv folder.""" 76 | ctx.run("rm -rf .venv/") 77 | 78 | 79 | @task 80 | def poetry(ctx: Context) -> None: 81 | """Clean poetry lock file.""" 82 | ctx.run("rm -f poetry.lock") 83 | 84 | 85 | @task 86 | def python(ctx: Context) -> None: 87 | """Clean python caches and bytecodes.""" 88 | ctx.run("find . -type f -name '*.py[co]' -delete") 89 | ctx.run(r"find . -type d -name __pycache__ -exec rm -r {} \+") 90 | 91 | 92 | # %% PROJECTS 93 | 94 | 95 | @task 96 | def requirements(ctx: Context) -> None: 97 | """Clean the project requirements file.""" 98 | ctx.run("rm -f requirements.txt") 99 | 100 | 101 | @task 102 | def environment(ctx: Context) -> None: 103 | """Clean the project environment file.""" 104 | ctx.run("rm -f python_env.yaml") 105 | 106 | 107 | # %% - Combines 108 | 109 | 110 | @task(pre=[mypy, ruff, pytest, coverage]) 111 | def tools(_: Context) -> None: 112 | """Run all tools tasks.""" 113 | 114 | 115 | @task(pre=[dist, docs, cache, mlruns, outputs]) 116 | def folders(_: Context) -> None: 117 | """Run all folders tasks.""" 118 | 119 | 120 | @task(pre=[venv, poetry, python]) 121 | def sources(_: Context) -> None: 122 | """Run all sources tasks.""" 123 | 124 | 125 | @task(pre=[requirements, environment]) 126 | def projects(_: Context) -> None: 127 | """Run all projects tasks.""" 128 | 129 | 130 | @task(pre=[tools, folders], default=True) 131 | def all(_: Context) -> None: 132 | """Run all tools and folders tasks.""" 133 | 134 | 135 | @task(pre=[all, sources, projects]) 136 | def reset(_: Context) -> None: 137 | """Run all tools, folders, sources, and projects tasks.""" 138 | -------------------------------------------------------------------------------- /tasks/commits.py: -------------------------------------------------------------------------------- 1 | """Commits tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def info(ctx: Context) -> None: 13 | """Print a guide for messages.""" 14 | ctx.run("poetry run cz info", pty=True) 15 | 16 | 17 | @task 18 | def bump(ctx: Context) -> None: 19 | """Bump the version of the package.""" 20 | ctx.run("poetry run cz bump", pty=True) 21 | 22 | 23 | @task 24 | def commit(ctx: Context) -> None: 25 | """Commit all changes with a message.""" 26 | ctx.run("poetry run cz commit", pty=True) 27 | 28 | 29 | @task(pre=[commit], default=True) 30 | def all(_: Context) -> None: 31 | """Run all commit tasks.""" 32 | -------------------------------------------------------------------------------- /tasks/containers.py: -------------------------------------------------------------------------------- 1 | """Container tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | from . import packages 9 | 10 | # %% CONFIGS 11 | 12 | IMAGE_TAG = "latest" 13 | 14 | # %% TASKS 15 | 16 | 17 | @task 18 | def compose(ctx: Context) -> None: 19 | """Start up docker compose.""" 20 | ctx.run("docker compose up") 21 | 22 | 23 | @task(pre=[packages.build]) 24 | def build(ctx: Context, tag: str = IMAGE_TAG) -> None: 25 | """Build the container image.""" 26 | ctx.run(f"docker build --tag={ctx.project.repository}:{tag} .") 27 | 28 | 29 | @task 30 | def run(ctx: Context, tag: str = IMAGE_TAG) -> None: 31 | """Run the container image.""" 32 | ctx.run(f"docker run --rm {ctx.project.repository}:{tag}") 33 | 34 | 35 | @task(pre=[build, run], default=True) 36 | def all(_: Context) -> None: 37 | """Run all container tasks.""" 38 | -------------------------------------------------------------------------------- /tasks/docs.py: -------------------------------------------------------------------------------- 1 | """Docs tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | from . import cleans 9 | 10 | # %% CONFIGS 11 | 12 | DOC_FORMAT = "google" 13 | OUTPUT_DIR = "docs/" 14 | 15 | # %% TASKS 16 | 17 | 18 | @task 19 | def serve(ctx: Context, format: str = DOC_FORMAT, port: int = 8088) -> None: 20 | """Serve the API docs with pdoc.""" 21 | ctx.run(f"poetry run pdoc --docformat={format} --port={port} src/{ctx.project.package}") 22 | 23 | 24 | @task 25 | def api(ctx: Context, format: str = DOC_FORMAT, output_dir: str = OUTPUT_DIR) -> None: 26 | """Generate the API docs with pdoc.""" 27 | ctx.run( 28 | f"poetry run pdoc --docformat={format} --output-directory={output_dir} src/{ctx.project.package}" 29 | ) 30 | 31 | 32 | @task(pre=[cleans.docs, api], default=True) 33 | def all(_: Context) -> None: 34 | """Run all docs tasks.""" 35 | -------------------------------------------------------------------------------- /tasks/formats.py: -------------------------------------------------------------------------------- 1 | """Format tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def imports(ctx: Context) -> None: 13 | """Format python imports with ruff.""" 14 | ctx.run("poetry run ruff check --select I --fix") 15 | 16 | 17 | @task 18 | def sources(ctx: Context) -> None: 19 | """Format python sources with ruff.""" 20 | ctx.run("poetry run ruff format src/ tasks/ tests/") 21 | 22 | 23 | @task(pre=[imports, sources], default=True) 24 | def all(_: Context) -> None: 25 | """Run all format tasks.""" 26 | -------------------------------------------------------------------------------- /tasks/installs.py: -------------------------------------------------------------------------------- 1 | """Install tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def poetry(ctx: Context) -> None: 13 | """Install poetry packages.""" 14 | ctx.run("poetry install") 15 | 16 | 17 | @task 18 | def pre_commit(ctx: Context) -> None: 19 | """Install pre-commit hooks on git.""" 20 | ctx.run("poetry run pre-commit install --hook-type pre-push") 21 | ctx.run("poetry run pre-commit install --hook-type commit-msg") 22 | 23 | 24 | @task(pre=[poetry, pre_commit], default=True) 25 | def all(_: Context) -> None: 26 | """Run all install tasks.""" 27 | -------------------------------------------------------------------------------- /tasks/mlflow.py: -------------------------------------------------------------------------------- 1 | """Mlflow tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def doctor(ctx: Context) -> None: 13 | """Run mlflow doctor.""" 14 | ctx.run("poetry run mlflow doctor") 15 | 16 | 17 | @task 18 | def serve( 19 | ctx: Context, host: str = "127.0.0.1", port: str = "5000", backend_uri: str = "./mlruns" 20 | ) -> None: 21 | """Start the mlflow server.""" 22 | ctx.run( 23 | f"poetry run mlflow server --host={host} --port={port} --backend-store-uri={backend_uri}" 24 | ) 25 | 26 | 27 | @task(pre=[doctor, serve], default=True) 28 | def all(_: Context) -> None: 29 | """Run all mlflow tasks.""" 30 | -------------------------------------------------------------------------------- /tasks/packages.py: -------------------------------------------------------------------------------- 1 | """Package tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | from . import cleans 9 | 10 | # %% CONFIGS 11 | 12 | BUILD_FORMAT = "wheel" 13 | 14 | # %% TASKS 15 | 16 | 17 | @task(pre=[cleans.dist]) 18 | def build(ctx: Context, format: str = BUILD_FORMAT) -> None: 19 | """Build the python package.""" 20 | ctx.run(f"poetry build --format={format}") 21 | 22 | 23 | @task(pre=[build], default=True) 24 | def all(_: Context) -> None: 25 | """Run all package tasks.""" 26 | -------------------------------------------------------------------------------- /tasks/projects.py: -------------------------------------------------------------------------------- 1 | """Project tasks for pyinvoke.""" 2 | 3 | # mypy: disable-error-code="arg-type" 4 | 5 | # %% IMPORTS 6 | 7 | import json 8 | 9 | from invoke.context import Context 10 | from invoke.tasks import call, task 11 | 12 | # %% CONFIGS 13 | 14 | PYTHON_VERSION = ".python-version" 15 | REQUIREMENTS = "requirements.txt" 16 | ENVIRONMENT = "python_env.yaml" 17 | 18 | # %% TASKS 19 | 20 | 21 | @task 22 | def requirements(ctx: Context) -> None: 23 | """Export the project requirements file.""" 24 | ctx.run(f"poetry export --without-urls --without-hashes --output={REQUIREMENTS}") 25 | 26 | 27 | @task(pre=[requirements]) 28 | def environment(ctx: Context) -> None: 29 | """Export the project environment file.""" 30 | with open(PYTHON_VERSION, "r") as reader: 31 | python = reader.read().strip() # version 32 | configuration: dict[str, object] = {"python": python} 33 | with open(REQUIREMENTS, "r") as reader: 34 | dependencies: list[str] = [] 35 | for line in reader: 36 | dependency = line.split(" ")[0] 37 | if "pywin32" not in dependency: 38 | dependencies.append(dependency) 39 | configuration["dependencies"] = dependencies 40 | with open(ENVIRONMENT, "w") as writer: 41 | # Safe as YAML is a superset of JSON 42 | json.dump(configuration, writer, indent=4) 43 | writer.write("\n") # add new line at the end 44 | 45 | 46 | @task 47 | def run(ctx: Context, job: str) -> None: 48 | """Run an mlflow project from the MLproject file.""" 49 | ctx.run( 50 | f"poetry run mlflow run --experiment-name={ctx.project.repository}" 51 | f" --run-name={job.capitalize()} -P job={job} ." 52 | ) 53 | 54 | 55 | @task(pre=[environment, call(run, job="main")], default=True) 56 | def all(_: Context) -> None: 57 | """Run all project tasks.""" 58 | -------------------------------------------------------------------------------- /tasks/serve.py: -------------------------------------------------------------------------------- 1 | """Commits tasks for pyinvoke.""" 2 | 3 | # %% IMPORTS 4 | 5 | from invoke.context import Context 6 | from invoke.tasks import task 7 | 8 | # %% TASKS 9 | 10 | 11 | @task 12 | def serve(ctx: Context) -> None: 13 | """Run the serving endpoint.""" 14 | ctx.run("poetry run python serving_endpoint/server.py", pty=True) 15 | 16 | 17 | @task(pre=[serve], default=True) 18 | def all(_: Context) -> None: 19 | """Run all commit tasks.""" 20 | -------------------------------------------------------------------------------- /tests/confs/invalid/0. invalid.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: UnknownJob -------------------------------------------------------------------------------- /tests/confs/valid/0. feature_engineering.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: FeatureEngineeringJob 3 | embedding_model: "amazon.titan-embed-text-v1" 4 | vector_store_path: "http://localhost:6333" 5 | document_path: "${tests_path:}/data/documents/" 6 | collection_name: "hr-documents" -------------------------------------------------------------------------------- /tests/confs/valid/1. deployment.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: DeploymentJob 3 | staging_alias: "champion" 4 | production_alias: "production" 5 | registry_model_name: "pytest-rag-chatbot-with-guardrails" 6 | llm_confs: "/confs/rag_chain_config.yaml" 7 | llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py" 8 | vector_store_path: "http://localhost:6333" -------------------------------------------------------------------------------- /tests/confs/valid/2. monitoring.yaml: -------------------------------------------------------------------------------- 1 | job: 2 | KIND: MonitoringEvalJob 3 | vector_store_path: ""http://localhost:6333" 4 | registry_model_name: "rag-chatbot" 5 | qa_dataset_path: "${tests_path:}/data/datasets/rag_dataset.csv" 6 | alias: "champion" 7 | 8 | metric_tresholds: 9 | flesch_kincaid_grade_level_mean: 5.1 # bigger than 10 | ari_grade_level_mean: 4.1 # bigger than 11 | 12 | trace_experiment_name: "rag_chatbot_experiment" 13 | monitoring_experiment_name: "monitoring" 14 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Configuration for the tests.""" 2 | 3 | # %% IMPORTS 4 | 5 | import os 6 | import typing as T 7 | 8 | import omegaconf 9 | import pytest 10 | from _pytest import logging as pl 11 | from llmops_project.io import services 12 | 13 | # %% CONFIGS 14 | 15 | LIMIT = 1500 16 | N_SPLITS = 3 17 | TEST_SIZE = 24 * 7 # 1 week 18 | 19 | # %% FIXTURES 20 | 21 | # %% - Paths 22 | 23 | 24 | @pytest.fixture(scope="session") 25 | def tests_path() -> str: 26 | """Return the path of the tests folder.""" 27 | file = os.path.abspath(__file__) 28 | parent = os.path.dirname(file) 29 | return parent 30 | 31 | 32 | @pytest.fixture(scope="session") 33 | def data_path(tests_path: str) -> str: 34 | """Return the path of the data folder.""" 35 | return os.path.join(tests_path, "data") 36 | 37 | 38 | @pytest.fixture(scope="session") 39 | def confs_path(tests_path: str) -> str: 40 | """Return the path of the confs folder.""" 41 | return os.path.join(tests_path, "confs") 42 | 43 | 44 | @pytest.fixture(scope="session") 45 | def inputs_path(data_path: str) -> str: 46 | """Return the path of the inputs dataset.""" 47 | return os.path.join(data_path, "inputs_sample.parquet") 48 | 49 | 50 | @pytest.fixture(scope="session") 51 | def targets_path(data_path: str) -> str: 52 | """Return the path of the targets dataset.""" 53 | return os.path.join(data_path, "targets_sample.parquet") 54 | 55 | 56 | @pytest.fixture(scope="session") 57 | def outputs_path(data_path: str) -> str: 58 | """Return the path of the outputs dataset.""" 59 | return os.path.join(data_path, "outputs_sample.parquet") 60 | 61 | 62 | @pytest.fixture(scope="session") 63 | def session_tmp_path(tmp_path_factory) -> str: 64 | """Create a session-scoped temporary directory.""" 65 | return tmp_path_factory.mktemp("session_tmp") 66 | 67 | 68 | @pytest.fixture(scope="session") 69 | def tmp_outputs_path(session_tmp_path: str) -> str: 70 | """Return a session-scoped tmp path for the outputs dataset.""" 71 | return os.path.join(session_tmp_path, "outputs.parquet") 72 | 73 | 74 | @pytest.fixture(scope="session") 75 | def tmp_models_explanations_path(session_tmp_path: str) -> str: 76 | """Return a session-scoped tmp path for the model explanations dataset.""" 77 | return os.path.join(session_tmp_path, "models_explanations.parquet") 78 | 79 | 80 | @pytest.fixture(scope="session") 81 | def tmp_samples_explanations_path(session_tmp_path: str) -> str: 82 | """Return a session-scoped tmp path for the samples explanations dataset.""" 83 | return os.path.join(session_tmp_path, "samples_explanations.parquet") 84 | 85 | 86 | # %% - Configs 87 | 88 | 89 | @pytest.fixture(scope="session") 90 | def extra_config() -> str: 91 | """Extra config for scripts.""" 92 | # use OmegaConf resolver: ${tmp_path:} 93 | config = """ 94 | { 95 | "job": { 96 | "alerts_service": { 97 | "enable": false, 98 | }, 99 | "mlflow_service": { 100 | "tracking_uri": "${tmp_path:}/tracking/", 101 | "registry_uri": "${tmp_path:}/registry/", 102 | } 103 | } 104 | } 105 | """ 106 | return config 107 | 108 | 109 | # %% - Resolvers 110 | 111 | 112 | @pytest.fixture(scope="session", autouse=True) 113 | def tests_path_resolver(tests_path: str) -> str: 114 | """Register the tests path resolver with OmegaConf.""" 115 | 116 | def resolver() -> str: 117 | """Get tests path.""" 118 | return tests_path 119 | 120 | omegaconf.OmegaConf.register_new_resolver("tests_path", resolver, use_cache=True, replace=False) 121 | return tests_path 122 | 123 | 124 | @pytest.fixture(scope="session", autouse=True) 125 | def tmp_path_resolver(session_tmp_path: str) -> str: 126 | """Register the session-scoped tmp path resolver with OmegaConf.""" 127 | 128 | def resolver() -> str: 129 | """Get session tmp data path.""" 130 | return session_tmp_path 131 | 132 | omegaconf.OmegaConf.register_new_resolver("tmp_path", resolver, use_cache=False, replace=True) 133 | return session_tmp_path 134 | 135 | 136 | # %% - Services 137 | 138 | 139 | @pytest.fixture(scope="session", autouse=True) 140 | def logger_service() -> T.Generator[services.LoggerService, None, None]: 141 | """Return and start the logger service.""" 142 | service = services.LoggerService(colorize=False, diagnose=True) 143 | service.start() 144 | yield service 145 | service.stop() 146 | 147 | 148 | @pytest.fixture 149 | def logger_caplog( 150 | caplog: pl.LogCaptureFixture, logger_service: services.LoggerService 151 | ) -> T.Generator[pl.LogCaptureFixture, None, None]: 152 | """Extend pytest caplog fixture with the logger service (loguru).""" 153 | # https://loguru.readthedocs.io/en/stable/resources/migration.html#replacing-caplog-fixture-from-pytest-library 154 | logger = logger_service.logger() 155 | handler_id = logger.add( 156 | caplog.handler, 157 | level=0, 158 | format="{message}", 159 | filter=lambda record: record["level"].no >= caplog.handler.level, 160 | enqueue=False, # Set to 'True' if your test is spawning child processes. 161 | ) 162 | yield caplog 163 | logger.remove(handler_id) 164 | 165 | 166 | # @pytest.fixture(scope="session", autouse=True) 167 | # def alerts_service() -> T.Generator[services.AlertsService, None, None]: 168 | # """Return and start the alerter service.""" 169 | # service = services.AlertsService(enable=False) 170 | # service.start() 171 | # yield service 172 | # service.stop() 173 | 174 | 175 | @pytest.fixture(scope="session", autouse=True) 176 | def mlflow_service(session_tmp_path: str) -> T.Generator[services.MlflowService, None, None]: 177 | """Return and start the mlflow service.""" 178 | service = services.MlflowService( 179 | tracking_uri=f"{session_tmp_path}/tracking/", 180 | registry_uri=f"{session_tmp_path}/registry/", 181 | experiment_name="Experiment-Testing", 182 | registry_name="Registry-Testing", 183 | ) 184 | service.start() 185 | yield service 186 | service.stop() 187 | 188 | 189 | # %% - Signatures 190 | -------------------------------------------------------------------------------- /tests/documents/sample_hr_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/tests/documents/sample_hr_manual.pdf -------------------------------------------------------------------------------- /tests/io/test_configs.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | 3 | import os 4 | 5 | import omegaconf as oc 6 | from llmops_project.io import configs 7 | 8 | # %% PARSERS 9 | 10 | 11 | def test_parse_file(tmp_path: str) -> None: 12 | # given 13 | text = """ 14 | a: 1 15 | b: True 16 | c: [3, 4] 17 | """ 18 | path = os.path.join(tmp_path, "config.yml") 19 | with open(path, "w", encoding="utf-8") as writer: 20 | writer.write(text) 21 | # when 22 | config = configs.parse_file(path) 23 | # then 24 | assert config == { 25 | "a": 1, 26 | "b": True, 27 | "c": [3, 4], 28 | }, "File config should be parsed correctly!" 29 | 30 | 31 | def test_parse_string() -> None: 32 | # given 33 | text = """{"a": 1, "b": 2, "data": [3, 4]}""" 34 | # when 35 | config = configs.parse_string(text) 36 | # then 37 | assert config == { 38 | "a": 1, 39 | "b": 2, 40 | "data": [3, 4], 41 | }, "String config should be parsed correctly!" 42 | 43 | 44 | # %% MERGERS 45 | 46 | 47 | def test_merge_configs() -> None: 48 | # given 49 | confs = [oc.OmegaConf.create({"x": i, i: i}) for i in range(3)] 50 | # when 51 | config = configs.merge_configs(confs) 52 | # then 53 | assert config == { 54 | 0: 0, 55 | 1: 1, 56 | 2: 2, 57 | "x": 2, 58 | }, "Configs should be merged correctly!" 59 | 60 | 61 | # %% CONVERTERS 62 | 63 | 64 | def test_to_object() -> None: 65 | # given 66 | values = { 67 | "a": 1, 68 | "b": True, 69 | "c": [3, 4], 70 | } 71 | config = oc.OmegaConf.create(values) 72 | # when 73 | object_ = configs.to_object(config) 74 | # then 75 | assert object_ == values, "Object should be the same!" 76 | assert isinstance(object_, dict), "Object should be a dict!" 77 | -------------------------------------------------------------------------------- /tests/io/test_services.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | 3 | import _pytest.logging as pl 4 | import mlflow 5 | from llmops_project.io import services 6 | 7 | # %% SERVICES 8 | 9 | 10 | def test_logger_service( 11 | logger_service: services.LoggerService, logger_caplog: pl.LogCaptureFixture 12 | ) -> None: 13 | # given 14 | service = logger_service 15 | logger = service.logger() 16 | # when 17 | logger.debug("DEBUG") 18 | logger.error("ERROR") 19 | # then 20 | assert "DEBUG" in logger_caplog.messages, "Debug message should be logged!" 21 | assert "ERROR" in logger_caplog.messages, "Error message should be logged!" 22 | 23 | 24 | def test_mlflow_service(mlflow_service: services.MlflowService) -> None: 25 | # given 26 | service = mlflow_service 27 | run_config = mlflow_service.RunConfig( 28 | name="testing", 29 | tags={"service": "mlflow"}, 30 | description="a test run.", 31 | log_system_metrics=True, 32 | ) 33 | # when 34 | client = service.client() 35 | with service.run_context(run_config=run_config) as context: 36 | pass 37 | finished = client.get_run(run_id=context.info.run_id) 38 | # then 39 | # - run 40 | assert run_config.tags is not None, "Run config tags should be set!" 41 | # - mlflow 42 | assert service.tracking_uri == mlflow.get_tracking_uri(), "Tracking URI should be the same!" 43 | assert service.registry_uri == mlflow.get_registry_uri(), "Registry URI should be the same!" 44 | assert mlflow.get_experiment_by_name(service.experiment_name), "Experiment should be setup!" 45 | # - client 46 | assert service.tracking_uri == client.tracking_uri, "Tracking URI should be the same!" 47 | assert service.registry_uri == client._registry_uri, "Tracking URI should be the same!" 48 | assert client.get_experiment_by_name(service.experiment_name), "Experiment should be setup!" 49 | # - context 50 | assert context.info.run_name == run_config.name, "Context name should be the same!" 51 | assert ( 52 | run_config.description in context.data.tags.values() 53 | ), "Context desc. should be in tags values!" 54 | assert ( 55 | context.data.tags.items() > run_config.tags.items() 56 | ), "Context tags should be a subset of the given tags!" 57 | assert context.info.status == "RUNNING", "Context should be running!" 58 | # - finished 59 | assert finished.info.status == "FINISHED", "Finished should be finished!" 60 | -------------------------------------------------------------------------------- /tests/pipelines/deployment/test_deploy_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob 3 | 4 | # %% IMPORTS 5 | 6 | 7 | # %% TESTS 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "registry_model_name, staging_alias, production_alias", 12 | [ 13 | ("test_model", "champion", "production"), 14 | pytest.param( 15 | "invalid_model", 16 | "champion", 17 | "production", 18 | marks=pytest.mark.xfail(reason="Invalid model name", raises=Exception), 19 | ), 20 | ], 21 | ) 22 | def test_deploy_model_job( 23 | mlflow_service, 24 | logger_service, 25 | registry_model_name: str, 26 | staging_alias: str, 27 | production_alias: str, 28 | ): 29 | job = DeployModelJob( 30 | registry_model_name=registry_model_name, 31 | staging_alias=staging_alias, 32 | production_alias=production_alias, 33 | mlflow_service=mlflow_service, 34 | logger_service=logger_service, 35 | ) 36 | 37 | with job as runner: 38 | result = runner.run() 39 | 40 | assert set(result.keys()) == { 41 | "self", 42 | "logger", 43 | "client", 44 | } 45 | 46 | model_version = result["client"].get_model_version_by_alias( 47 | name=registry_model_name, alias=production_alias 48 | ) 49 | tags = model_version.tags 50 | assert tags["passed_tests"] == "True" 51 | -------------------------------------------------------------------------------- /tests/pipelines/deployment/test_register_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob 3 | 4 | # %% IMPORTS 5 | 6 | 7 | # %% TESTS 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "registry_model_name, staging_alias, llm_model_code_path, llm_confs, vector_store_path", 12 | [ 13 | ( 14 | "test_model", 15 | "champion", 16 | "/src/llmops_project/models/chatbot_with_guardrails.py", 17 | "/confs/rag_chain_config.yaml", 18 | "http://localhost:6333", 19 | ), 20 | pytest.param( 21 | "invalid_model", 22 | "champion", 23 | "/invalid/path/to/model/code", 24 | "/invalid/path/to/config", 25 | "/invalid/path/to/vector/store", 26 | marks=pytest.mark.xfail(reason="Invalid paths", raises=Exception), 27 | ), 28 | ], 29 | ) 30 | def test_log_and_register_model_job( 31 | mlflow_service, 32 | logger_service, 33 | registry_model_name: str, 34 | staging_alias: str, 35 | llm_model_code_path: str, 36 | llm_confs: str, 37 | vector_store_path: str, 38 | ): 39 | # Given: A LogAndRegisterModelJob instance with the provided parameters 40 | job = LogAndRegisterModelJob( 41 | registry_model_name=registry_model_name, 42 | staging_alias=staging_alias, 43 | llm_model_code_path=llm_model_code_path, 44 | llm_confs=llm_confs, 45 | vector_store_path=vector_store_path, 46 | mlflow_service=mlflow_service, 47 | logger_service=logger_service, 48 | ) 49 | 50 | # When: The job is run 51 | with job as runner: 52 | out = runner.run() 53 | 54 | # Then: Verify the expected results 55 | assert set(out) == { 56 | "model_specs", 57 | "self", 58 | "llm_code_path", 59 | "logger", 60 | "client", 61 | "config_path", 62 | "run_id", 63 | "vector_store_path", 64 | "input_example", 65 | "script_dir", 66 | } 67 | 68 | # Verify if model was registered by checking if the model version exists 69 | 70 | latest_version = out["client"].get_model_version_by_alias(registry_model_name, staging_alias) 71 | 72 | tags = out["client"].get_model_version(registry_model_name, latest_version.version).tags 73 | 74 | assert "passed_tests" in tags, "Tag 'passed_tests' does not exist." 75 | -------------------------------------------------------------------------------- /tests/pipelines/feature_engineering/test_create_vector_db.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import pytest 4 | from langchain_aws import BedrockEmbeddings 5 | from langchain_qdrant import QdrantVectorStore 6 | from llmops_project.io import services 7 | from llmops_project.io.vector_db import QdrantVectorDB 8 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob 9 | 10 | # %% IMPORTS 11 | 12 | 13 | # %% TESTS 14 | 15 | 16 | @pytest.fixture 17 | def mock_bedrock_embeddings(): 18 | return mock.Mock(spec=BedrockEmbeddings) 19 | 20 | 21 | @pytest.fixture 22 | def mock_qdrant_vector_db(): 23 | return mock.Mock(spec=QdrantVectorDB) 24 | 25 | 26 | @pytest.mark.parametrize( 27 | "embedding_model, collection_name, vector_store_path", 28 | [ 29 | ("amazon.titan-embed-text-v1", "test_collection", "http://localhost:6333"), 30 | pytest.param( 31 | "amazon.titan-embed-text-v1", 32 | "test_collection", 33 | "http://localhost:6334", 34 | marks=pytest.mark.xfail(reason="Invalid localhost port", raises=Exception), 35 | ), 36 | ], 37 | ) 38 | def test_create_vector_db_job( 39 | logger_service: services.LoggerService, 40 | mlflow_service: services.MlflowService, 41 | embedding_model: str, 42 | collection_name: str, 43 | vector_store_path: str, 44 | ): 45 | job = CreateVectorDBJob( 46 | embedding_model=embedding_model, 47 | collection_name=collection_name, 48 | vector_store_path=vector_store_path, 49 | logger_service=logger_service, 50 | ) 51 | 52 | with job as runner: 53 | out = runner.run() 54 | 55 | assert set(out) == {"self", "logger", "embeddings", "vector_db", "script_dir"} 56 | 57 | # Vector Db 58 | assert out["vector_db"].embeddings.model_id == embedding_model 59 | assert out["vector_db"].collection_name == collection_name 60 | 61 | assert out["embeddings"].model_id == embedding_model 62 | 63 | try: 64 | QdrantVectorStore.from_existing_collection( 65 | embedding=out["embeddings"], 66 | collection_name=collection_name, 67 | url=vector_store_path, 68 | ) 69 | except Exception as e: 70 | pytest.fail(f"Failed to create QdrantVectorStore: {e}") 71 | -------------------------------------------------------------------------------- /tests/pipelines/feature_engineering/test_ingest_documents.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import pytest 4 | from langchain_aws import BedrockEmbeddings 5 | from llmops_project.io import services 6 | from llmops_project.io.vector_db import QdrantVectorDB 7 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob 8 | 9 | # %% IMPORTS 10 | 11 | 12 | # %% TESTS 13 | 14 | 15 | @pytest.fixture 16 | def mock_bedrock_embeddings(): 17 | return mock.Mock(spec=BedrockEmbeddings) 18 | 19 | 20 | @pytest.fixture 21 | def mock_qdrant_vector_db(): 22 | return mock.Mock(spec=QdrantVectorDB) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | "embedding_model, collection_name, vector_store_path, document_path", 27 | [ 28 | ( 29 | "amazon.titan-embed-text-v1", 30 | "test_collection", 31 | "http://localhost:6333", 32 | "/tests/documents/", 33 | ), 34 | pytest.param( 35 | "invalid_model", 36 | "test_collection", 37 | "http://localhost:6333", 38 | "/tests/documents/", 39 | marks=pytest.mark.xfail(reason="Invalid embedding model", raises=Exception), 40 | ), 41 | pytest.param( 42 | "amazon.titan-embed-text-v1", 43 | "test_collection", 44 | "http://localhost:6333", 45 | "/invalid_path", 46 | marks=pytest.mark.xfail(reason=" Directory not found", raises=FileNotFoundError), 47 | ), 48 | ], 49 | ) 50 | def test_ingest_and_update_vector_db_job( 51 | logger_service: services.LoggerService, 52 | embedding_model: str, 53 | collection_name: str, 54 | vector_store_path: str, 55 | document_path: str, 56 | ): 57 | job = IngestAndUpdateVectorDBJob( 58 | embedding_model=embedding_model, 59 | collection_name=collection_name, 60 | vector_store_path=vector_store_path, 61 | document_path=document_path, 62 | logger_service=logger_service, 63 | ) 64 | 65 | with job as runner: 66 | result = runner.run() 67 | 68 | assert set(result.keys()) == { 69 | "self", 70 | "logger", 71 | "embeddings", 72 | "vector_db", 73 | "script_dir", 74 | "document_path", 75 | } 76 | 77 | # Try Querying the Qdrant Vector Store 78 | assert result["vector_db"].embeddings.model_id == embedding_model 79 | assert result["vector_db"].collection_name == collection_name 80 | 81 | query_results = result["vector_db"].query_database("What is the content of the documents?") 82 | for res in query_results: 83 | assert set(res.keys()) == {"score", "text", "source"} 84 | assert res["score"] is not None 85 | assert isinstance(res["text"], str) 86 | assert isinstance(res["source"], str) 87 | -------------------------------------------------------------------------------- /tests/pipelines/monitoring/test_generate_rag_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from llmops_project.pipelines.monitoring.generate_rag_dataset import GenerateRagDatasetJob 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "data_path, qa_dataset_path_csv, qa_dataset_path_json, llm_model", 9 | [ 10 | ( 11 | "/tests/documents/", 12 | "data/qa_dataset.csv", 13 | "data/qa_dataset.json", 14 | "anthropic.claude-3-haiku-20240307-v1:0", 15 | ), 16 | pytest.param( 17 | "/invalid_path", 18 | "data/qa_dataset.csv", 19 | "data/qa_dataset.json", 20 | "anthropic.claude-3-haiku-20240307-v1:0", 21 | marks=pytest.mark.xfail(reason="Invalid data path", raises=Exception), 22 | ), 23 | ], 24 | ) 25 | def test_generate_rag_dataset_job( 26 | logger_service, 27 | data_path: str, 28 | qa_dataset_path_csv: str, 29 | qa_dataset_path_json: str, 30 | llm_model: str, 31 | ): 32 | # Given: A GenerateRagDatasetJob instance with the provided parameters 33 | job = GenerateRagDatasetJob( 34 | data_path=data_path, 35 | qa_dataset_path_csv=qa_dataset_path_csv, 36 | qa_dataset_path_json=qa_dataset_path_json, 37 | llm_model=llm_model, 38 | logger_service=logger_service, 39 | ) 40 | 41 | # When: The job is run 42 | with job as runner: 43 | out = runner.run() 44 | 45 | # Then: Verify the expected results 46 | assert set(out) == { 47 | "self", 48 | "data_path", 49 | "final_dataset_path", 50 | "final_dataset_json_path", 51 | "logger", 52 | "script_dir", 53 | "project_root", 54 | } 55 | 56 | # Verify if the CSV and JSON files are created 57 | assert Path(out["final_dataset_path"]).exists(), "CSV file was not created." 58 | assert Path(out["final_dataset_json_path"]).exists(), "JSON file was not created." 59 | -------------------------------------------------------------------------------- /tests/pipelines/monitoring/test_pre_deploy_eval.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob 3 | 4 | # %% TESTS 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "qa_dataset_path, registry_model_name, alias, vector_store_path, metric_tresholds, expect_failure", 9 | [ 10 | ( 11 | "/data/qa_dataset.csv", 12 | "test_model", 13 | "champion", 14 | "http://localhost:6333", 15 | {"flesch_kincaid_grade_level_mean": 5.0, "ari_grade_level_mean": 5.0}, 16 | False, 17 | ), 18 | pytest.param( 19 | "/invalid/path/to/qa_dataset.csv", 20 | "test_model", 21 | "champion", 22 | "/invalid/path/to/vector_store", 23 | {"flesch_kincaid_grade_level_mean": 5.0, "ari_grade_level_mean": 5.0}, 24 | True, 25 | marks=pytest.mark.xfail(reason="Invalid paths", raises=Exception), 26 | ), 27 | ], 28 | ) 29 | def test_evaluate_model_job( 30 | mlflow_service, 31 | logger_service, 32 | qa_dataset_path: str, 33 | registry_model_name: str, 34 | alias: str, 35 | vector_store_path: str, 36 | metric_tresholds: dict, 37 | expect_failure: bool, 38 | ): 39 | # Given: An EvaluateModelJob instance with the provided parameters 40 | job = EvaluateModelJob( 41 | qa_dataset_path=qa_dataset_path, 42 | registry_model_name=registry_model_name, 43 | alias=alias, 44 | vector_store_path=vector_store_path, 45 | metric_tresholds=metric_tresholds, 46 | mlflow_service=mlflow_service, 47 | logger_service=logger_service, 48 | ) 49 | 50 | # When: The job is run 51 | with job as runner: 52 | out = runner.run() 53 | 54 | # Then: Verify the expected results 55 | if expect_failure: 56 | assert "eval_df" not in out, "Evaluation DataFrame should not be present." 57 | else: 58 | assert "eval_df" in out, "Evaluation DataFrame not found in output." 59 | assert "results" in out, "Results not found in output." 60 | 61 | # Verify the output variables 62 | assert set(out) == { 63 | "logger", 64 | "script_dir", 65 | "data_path", 66 | "eval_df", 67 | "model", 68 | "results", 69 | "result_metrics", 70 | "metrics", 71 | "thresholds", 72 | "beats_baseline", 73 | "self", 74 | "threshold", 75 | "metric", 76 | } 77 | -------------------------------------------------------------------------------- /tests/pipelines/test_base.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | from llmops_project.io import services 3 | from llmops_project.pipelines import base 4 | 5 | # %% JOBS 6 | 7 | 8 | def test_job( 9 | logger_service: services.LoggerService, 10 | mlflow_service: services.MlflowService, 11 | ) -> None: 12 | # given 13 | class MyJob(base.Job): 14 | KIND: str = "MyJob" 15 | 16 | def run(self) -> base.Locals: 17 | a, b = 1, "test" 18 | return locals() 19 | 20 | job = MyJob(logger_service=logger_service, mlflow_service=mlflow_service) 21 | # when 22 | with job as runner: 23 | out = runner.run() 24 | # then 25 | # - inputs 26 | assert hasattr(job, "logger_service"), "Job should have an Logger service!" 27 | assert hasattr(job, "mlflow_service"), "Job should have an Mlflow service!" 28 | # - outputs 29 | assert set(out) == {"self", "a", "b"}, "Run should return local variables!" 30 | -------------------------------------------------------------------------------- /tests/test_scripts.py: -------------------------------------------------------------------------------- 1 | # %% IMPORTS 2 | 3 | 4 | # %% FUNCTIONS 5 | 6 | 7 | # %% SCRIPTS 8 | 9 | 10 | # def test_schema(capsys: pc.CaptureFixture[str]) -> None: 11 | # # given 12 | # args = ["prog", "--schema"] 13 | # # when 14 | # scripts.main(args) 15 | # captured = capsys.readouterr() 16 | # # then 17 | # assert captured.err == "", "Captured error should be empty!" 18 | # assert json.loads(captured.out), "Captured output should be a JSON!" 19 | 20 | 21 | # @pytest.mark.parametrize( 22 | # "scenario", 23 | # [ 24 | # "valid", 25 | # pytest.param( 26 | # "invalid", 27 | # marks=pytest.mark.xfail( 28 | # reason="Invalid config.", 29 | # raises=pdt.ValidationError, 30 | # ), 31 | # ), 32 | # ], 33 | # ) 34 | # def test_main(scenario: str, confs_path: str, extra_config: str) -> None: 35 | # # given 36 | # folder = os.path.join(confs_path, scenario) 37 | # confs = list(sorted(os.listdir(folder))) 38 | # # when 39 | # for conf in confs: # one job per config 40 | # config = os.path.join(folder, conf) 41 | # argv = [config, "-e", extra_config] 42 | # status = scripts.main(argv=argv) 43 | # # then 44 | # assert status == 0, f"Job should succeed for config: {config}" 45 | 46 | 47 | # def test_main__no_configs() -> None: 48 | # # given 49 | # argv: list[str] = [] 50 | # # when 51 | # with pytest.raises(RuntimeError) as error: 52 | # scripts.main(argv) 53 | # # then 54 | # assert error.match("No configs provided."), "RuntimeError should be raised!" 55 | 56 | 57 | # %% 58 | --------------------------------------------------------------------------------