├── .github
    ├── ISSUE_TEMPLATE
    │   ├── feat-request.md
    │   └── fix-request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── actions
    │   └── setup
    │   │   └── action.yml
    └── workflows
    │   ├── check.yml
    │   └── publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENCE.txt
├── MLproject
├── README.md
├── confs
    ├── .gitkeep
    ├── deployment.yaml
    ├── feature_eng.yaml
    ├── generate_rag_dataset.yaml
    ├── monitoring.yaml
    └── rag_chain_config.yaml
├── data
    ├── .gitkeep
    ├── datasets
    │   ├── .gitkeep
    │   ├── rag_dataset.csv
    │   └── rag_dataset.json
    └── documents
    │   └── sample_hr_manual.pdf
├── docker-compose.yml
├── docs
    └── .gitkeep
├── invoke.yaml
├── llmops-project.code-workspace
├── mlruns
    └── .gitkeep
├── notebooks
    └── .gitkeep
├── outputs
    └── .gitkeep
├── poetry.lock
├── poetry.toml
├── pyproject.toml
├── serving_endpoint
    ├── Dockerfile
    ├── README.md
    ├── client.py
    ├── ecs
    │   ├── Makefile
    │   ├── README.md
    │   ├── deploy-image-to-ecs.sh
    │   └── infra
    │   │   ├── app
    │   │       ├── ecs
    │   │       │   ├── main.tf
    │   │       │   ├── output.tf
    │   │       │   └── variable.tf
    │   │       ├── main.tf
    │   │       ├── network
    │   │       │   ├── main.tf
    │   │       │   ├── outputs.tf
    │   │       │   └── variable.tf
    │   │       ├── terraform.tfstate.backup
    │   │       └── variable.tf
    │   │   └── setup
    │   │       ├── main.tf
    │   │       ├── output.tf
    │   │       └── variable.tf
    ├── example.env
    ├── imgs
    │   ├── Model-deployment.webp
    │   ├── litserve-deployment.png
    │   ├── litserve.png
    │   └── model-registry.webp
    ├── requirements.txt
    └── server.py
├── src
    └── llmops_project
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── io
    │       ├── configs.py
    │       ├── services.py
    │       └── vector_db.py
    │   ├── models
    │       ├── chatbot_with_guardrails.py
    │       └── rag_chatbot.py
    │   ├── pipelines
    │       ├── __init__.py
    │       ├── base.py
    │       ├── deployment
    │       │   ├── deploy_model.py
    │       │   └── register_model.py
    │       ├── feature_engineering
    │       │   ├── create_vector_db.py
    │       │   └── ingest_documents.py
    │       ├── managers
    │       │   ├── deployment_manager.py
    │       │   ├── feature_engineering_manager.py
    │       │   └── monitoring_manager.py
    │       └── monitoring
    │       │   ├── generate_rag_dataset.py
    │       │   ├── post_deploy_eval.py
    │       │   └── pre_deploy_eval.py
    │   ├── scripts.py
    │   └── settings.py
├── static
    ├── autoscaling.png
    ├── experiment_tracking.png
    ├── guage.png
    ├── llmops-rag.png
    ├── llmops.png
    ├── llmopsmindmap.png
    ├── model_version.png
    ├── monitoring.png
    ├── rag_lifecycle.png
    ├── tracing-top.gif
    ├── vector_db.png
    └── with_and_without_guardrails.svg
├── tasks
    ├── __init__.py
    ├── checks.py
    ├── cleans.py
    ├── commits.py
    ├── containers.py
    ├── docs.py
    ├── formats.py
    ├── installs.py
    ├── mlflow.py
    ├── packages.py
    ├── projects.py
    └── serve.py
└── tests
    ├── confs
        ├── invalid
        │   └── 0. invalid.yaml
        └── valid
        │   ├── 0. feature_engineering.yaml
        │   ├── 1. deployment.yaml
        │   └── 2. monitoring.yaml
    ├── conftest.py
    ├── documents
        └── sample_hr_manual.pdf
    ├── io
        ├── test_configs.py
        └── test_services.py
    ├── pipelines
        ├── deployment
        │   ├── test_deploy_model.py
        │   └── test_register_model.py
        ├── feature_engineering
        │   ├── test_create_vector_db.py
        │   └── test_ingest_documents.py
        ├── monitoring
        │   ├── test_generate_rag_dataset.py
        │   └── test_pre_deploy_eval.py
        └── test_base.py
    └── test_scripts.py


/.github/ISSUE_TEMPLATE/feat-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request
 3 | about: A new feature.
 4 | title: "[FEAT] "
 5 | labels: feat
 6 | assignees: callmesora
 7 | ---
 8 | 
 9 | ## Description
10 | 
11 | ## Motivation
12 | 
13 | ## Solutions


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/fix-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Fix Request
 3 | about: A bug fix
 4 | title: "[FIX] "
 5 | labels: fix
 6 | assignees: callmesora
 7 | ---
 8 | 
 9 | ## Bug Description
10 | 
11 | ## Expected Behavior
12 | 
13 | ## Steps to Reproduce
14 | 
15 | ## Additional Context


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Changes
2 | 
3 | # Reasons
4 | 
5 | # Testing
6 | 
7 | # Impacts
8 | 
9 | # Notes


--------------------------------------------------------------------------------
/.github/actions/setup/action.yml:
--------------------------------------------------------------------------------
 1 | name: Setup
 2 | description: Setup for project workflows
 3 | runs:
 4 |   using: composite
 5 |   steps:
 6 |     - run: pipx install invoke poetry
 7 |       shell: bash
 8 |     - uses: actions/setup-python@v5
 9 |       with:
10 |         python-version: 3.12
11 |         cache: poetry
12 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: Check
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 | concurrency:
 7 |   cancel-in-progress: true
 8 |   group: ${{ github.workflow }}-${{ github.ref }}
 9 | jobs:
10 |   checks:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - uses: ./.github/actions/setup
15 |       - run: poetry install --with checks
16 |       - run: poetry run invoke checks.format
17 |       - run: poetry run invoke checks.code
18 |       - run: poetry run invoke checks.type
19 |       - run: poetry run invoke checks.security
20 |       # TODO: Add tests once figuring out how to mock mlflow and qdrant
21 |       


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | on:
 3 |   release:
 4 |     types:
 5 |       - edited
 6 |       - published
 7 | env:
 8 |   DOCKER_IMAGE: ghcr.io/callmesora/llmops-project
 9 | concurrency:
10 |   cancel-in-progress: true
11 |   group: publish-workflow
12 | jobs:
13 |   pages:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: ./.github/actions/setup
18 |       - run: poetry install --with docs
19 |       - run: poetry run invoke docs
20 |       - uses: JamesIves/github-pages-deploy-action@v4
21 |         with:
22 |           folder: docs/
23 |           branch: gh-pages
24 |   packages:
25 |     permissions:
26 |       packages: write
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - uses: actions/checkout@v4
30 |       - uses: ./.github/actions/setup
31 |       - run: poetry install --with dev
32 |       - run: poetry run invoke packages
33 |       - uses: docker/login-action@v3
34 |         with:
35 |           registry: ghcr.io
36 |           username: ${{ github.actor }}
37 |           password: ${{ secrets.GITHUB_TOKEN }}
38 |       - uses: docker/setup-buildx-action@v3
39 |       - uses: docker/build-push-action@v6
40 |         with:
41 |           push: true
42 |           context: .
43 |           cache-to: type=gha
44 |           cache-from: type=gha
45 |           tags: |
46 |             ${{ env.DOCKER_IMAGE }}:latest
47 |             ${{ env.DOCKER_IMAGE }}:${{ github.ref_name }}
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # https://git-scm.com/docs/gitignore
 2 | 
 3 | # Build
 4 | /dist/
 5 | /build/
 6 | 
 7 | # Cache
 8 | .cache/
 9 | .coverage*
10 | .mypy_cache/
11 | .ruff_cache/
12 | .pytest_cache/
13 | .terraform.*
14 | .terraform
15 | terraform.tfstate
16 | 
17 | # Editor
18 | /.idea/
19 | /.vscode/
20 | .ipynb_checkpoints/
21 | 
22 | # Environs
23 | .env
24 | /.venv/
25 | 
26 | # Project
27 | /docs/*
28 | /mlruns/*
29 | /outputs/*
30 | !**/.gitkeep
31 | /qdrant_data/*
32 | qa_dataset.csv
33 | qa_dataset.json
34 | 
35 | # Python
36 | *.py[cod]
37 | __pycache__/
38 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # https://pre-commit.com
 2 | # https://pre-commit.com/hooks.html
 3 | 
 4 | default_language_version:
 5 |   python: python3.12
 6 | repos:
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v4.6.0
 9 |     hooks:
10 |       - id: check-added-large-files
11 |       - id: check-case-conflict
12 |       - id: check-merge-conflict
13 |       - id: check-toml
14 |       - id: check-yaml
15 |       - id: debug-statements
16 |       - id: end-of-file-fixer
17 |       - id: mixed-line-ending
18 |       - id: trailing-whitespace
19 |   - repo: https://github.com/python-poetry/poetry
20 |     rev: 1.8.3
21 |     hooks:
22 |       - id: poetry-check
23 |   - repo: https://github.com/astral-sh/ruff-pre-commit
24 |     rev: v0.5.0
25 |     hooks:
26 |       - id: ruff
27 |       - id: ruff-format
28 |   - repo: https://github.com/commitizen-tools/commitizen
29 |     rev: v3.27.0
30 |     hooks:
31 |       - id: commitizen
32 |       - id: commitizen-branch
33 |         stages: [push]
34 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## v0.2.0 (2024-12-10)
 2 | 
 3 | ### Feat
 4 | 
 5 | - enable Commitizen hooks in pre-commit configuration
 6 | - add Contributor Covenant Code of Conduct and remove deprecated client.py file
 7 | - add issue and pull request templates for feature and fix requests
 8 | - update environment configuration and improve service structure
 9 | - add llmops package
10 | 
11 | ### Fix
12 | 
13 | - **tasks**: add color to terminal when running inv commands
14 | - update .gitignore to exclude qa_dataset files
15 | - adjust mypy settings and update test parameters for document ingestion
16 | - update test order
17 | - update package name
18 | - update dependencies and mlflow track serv
19 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | github@fmind.dev.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # https://docs.docker.com/engine/reference/builder/
2 | 
3 | FROM python:3.12
4 | COPY dist/*.whl .
5 | RUN pip install *.whl
6 | CMD ["llmops-project", "--help"]
7 | 


--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2024 Pedro Azevedo (callmesora)
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/MLproject:
--------------------------------------------------------------------------------
 1 | # https://mlflow.org/docs/latest/projects.html
 2 | 
 3 | name: LLMOps Project
 4 | python_env: python_env.yaml
 5 | entry_points:
 6 |   main:
 7 |     parameters:
 8 |       job: string
 9 |     command: "PYTHONPATH=src python -m llmops_project {job}"
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LLMOps Python Pacakge
  2 | 
  3 | [![check.yml](https://github.com/callmesora/llmops-python-package/actions/workflows/check.yml/badge.svg)](https://github.com/callmesora/llmops-python-package/actions/workflows/check.yml)
  4 | [![publish.yml](https://github.com/callmesora/llmops-python-package/actions/workflows/publish.yml/badge.svg)](https://github.com/callmesora/llmops-python-package/actions/workflows/publish.yml)
  5 | 
  6 | [![License](https://img.shields.io/github/license/callmesora/llmops-python-package)](https://github.com/callmesora/llmops-python-package/blob/main/LICENCE.txt)
  7 | [![Release](https://img.shields.io/github/v/release/callmesora/llmops-python-package)](https://github.com/callmesora/llmops-python-package/releases)
  8 | 
  9 | **This repository contains a Python code base with best practices designed to support your LLMOps initiatives.**
 10 | 
 11 | ![LLMOps Python Package](static/llmops.png)
 12 | 
 13 | The package leverages several [tools](#tools) and [tips](#tips) to make your LLMOps experience as flexible, robust, productive as possible.
 14 | 
 15 | 
 16 | You can use this package as part of your LLMOps toolkit or platform (e.g., Model Registry, Experiment Tracking, Realtime Inference, ...).
 17 | 
 18 | This package is a variation / fork of these resources but specicially tailored for LLM use cases:
 19 | 
 20 | **Related Resources**:
 21 | - **[MLOps Coding Course (Learning)](https://github.com/MLOps-Courses/mlops-coding-course)**: Learn how to create, develop, and maintain a state-of-the-art MLOps code base.
 22 | - **[Cookiecutter MLOps Package (Template)](https://github.com/fmind/cookiecutter-mlops-package)**: Start building and deploying Python packages and Docker images for MLOps tasks.
 23 | 
 24 | ![](static/llmopsmindmap.png)
 25 | 
 26 | # Table of Contents
 27 | 
 28 | - [LLMOps Python Package](#mlops-python-package)
 29 | - [Table of Contents](#table-of-contents)
 30 | - [Install](#install)
 31 |   - [Prerequisites](#prerequisites)
 32 |   - [Installation](#installation)
 33 |   - [Next Steps](#next-steps)
 34 | - [Usage](#usage)
 35 |   - [Configuration](#configuration)
 36 |   - [Execution](#execution)
 37 |   - [Automation](#automation)
 38 |   - [Workflows](#workflows)
 39 | - [Tools](#tools)
 40 |   - [Automation](#automation-1)
 41 |     - [Commits: Commitizen](#commits-commitizen)
 42 |     - [Git Hooks: Pre-Commit](#git-hooks-pre-commit)
 43 |     - [Tasks: PyInvoke](#tasks-pyinvoke)
 44 |   - [CI/CD](#cicd)
 45 |     - [Runner: GitHub Actions](#runner-github-actions)
 46 |   - [CLI](#cli)
 47 |     - [Parser: Argparse](#parser-argparse)
 48 |     - [Logging: Loguru](#logging-loguru)
 49 |   - [Code](#code)
 50 |     - [Coverage: Coverage](#coverage-coverage)
 51 |     - [Editor: VS Code](#editor-vs-code)
 52 |     - [Formatting: Ruff](#formatting-ruff)
 53 |     - [Quality: Ruff](#quality-ruff)
 54 |     - [Security: Bandit](#security-bandit)
 55 |     - [Testing: Pytest](#testing-pytest)
 56 |     - [Typing: Mypy](#typing-mypy)
 57 |     - [Versioning: Git](#versioning-git)
 58 |   - [Configs](#configs)
 59 |     - [Format: YAML](#format-yaml)
 60 |     - [Parser: OmegaConf](#parser-omegaconf)
 61 |     - [Reader: Cloudpathlib](#reader-cloudpathlib)
 62 |     - [Validator: Pydantic](#validator-pydantic)
 63 |   - [Model](#model)
 64 |     - [Format: Mlflow Model](#format-mlflow-model)
 65 |     - [Registry: Mlflow Registry](#registry-mlflow-registry)
 66 |     - [Tracking: Mlflow Tracking](#tracking-mlflow-tracking)
 67 |   - [Package](#package)
 68 |     - [Evolution: Changelog](#evolution-changelog)
 69 |     - [Format: Wheel](#format-wheel)
 70 |     - [Manager: Poetry](#manager-poetry)
 71 |     - [Runtime: Docker](#runtime-docker)
 72 |   - [Programming](#programming)
 73 |     - [Language: Python](#language-python)
 74 |     - [Version: Pyenv](#version-pyenv)
 75 |   - [Observability](#observability)
 76 |     - [Monitoring : Mlflow Evaluate](#monitoring--mlflow-evaluate)
 77 |     - [Infrastructure: Mlflow System Metrics](#infrastructure-mlflow-system-metrics)
 78 |   - [Model Serving](#endpoint)
 79 |     - [Serving Endpoint: Litserve](#serving-endpoint)
 80 | - [Tips](#tips)
 81 |   - [Design Patterns](#design-patterns)
 82 |     - [Directed-Acyclic Graph](#directed-acyclic-graph)
 83 |     - [Program Service](#program-service)
 84 |     - [Soft Coding](#soft-coding)
 85 |     - [SOLID Principles](#solid-principles)
 86 |     - [IO Separation](#io-separation)
 87 |   - [Python Powers](#python-powers)
 88 |     - [Context Manager](#context-manager)
 89 |     - [Python Package](#python-package)
 90 |   - [Software Engineering](#software-engineering)
 91 |     - [Code Typing](#code-typing)
 92 |     - [Config Typing](#config-typing)
 93 |     - [Object Oriented](#object-oriented)
 94 |     - [Semantic Versioning](#semantic-versioning)
 95 |   - [Testing Tricks](#testing-tricks)
 96 |     - [Parallel Testing](#parallel-testing)
 97 |     - [Test Fixtures](#test-fixtures)
 98 |   - [VS Code](#vs-code)
 99 |     - [Code Workspace](#code-workspace)
100 |     - [GitHub Copilot](#github-copilot)
101 | - [Resources](#resources)
102 |   - [Python](#python)
103 |   - [AI/ML/MLOps](#aimlmlops)
104 | 
105 | # Architecture
106 | 
107 | ## RAG Evaluation
108 | 
109 | RAG Evaluation is performed by generating a synthetic dataset of QA answer pairs. This dataset serves as a baseline to evaluate the performance of different RAG systems before deploying them. By using a consistent and controlled dataset, we can objectively compare the effectiveness of various RAG implementations.
110 | 
111 | ![RAG Lifecycle](static/rag_lifecycle.png)
112 | 
113 | ## Model Registry
114 | 
115 | We use a pattern where all LLM chains are stored and logged in Mlflow. Each chain is evaluated against the RAG evaluation baseline. If a chain demonstrates better performance than the previous ones, it is registered and promoted to production. This ensures that only the best-performing models are deployed.
116 | 
117 | ![Experiment Tracking](static/experiment_tracking.png)
118 | 
119 | ## Guardrails
120 | Having Guardrails is important in production since it prevents the model from entering unexpected/ undesired behaviours.
121 | 
122 | This LLMOps template comes with a setup config files for guardrails for PII and Topic censuring that is built on top of [Guardrails AI](https://github.com/guardrails-ai/guardrails?tab=readme-ov-file)
123 | 
124 | ![Guardrails](static/with_and_without_guardrails.svg)
125 | 
126 | 
127 | ## Endpoint Deployment
128 | 
129 | Having a model registry is crucial for managing and running deployments. In this architecture, we use [Litserve](https://lightning.ai/docs/litserve/home), which builds on top of FastAPI, to deploy our LLMs. This setup allows for flexible deployment options, including Kubernetes and AWS Lambda, ensuring that our models can be scaled and managed efficiently.
130 | 
131 | ![Litserve](static/autoscaling.png)
132 | 
133 | You can check how to serve the model as well as code template to deploy on AWS Fargate under `/serving_endpoint`folder
134 | 
135 | 
136 | ## Model Monitoring
137 | 
138 | Model monitoring is crucial for ensuring the performance and reliability of your LLMs in production. Continuous monitoring helps in detecting issues such as performance degradation, and unexpected behaviors, which can significantly impact the user experience and business outcomes.
139 | 
140 | ![Mlflow Traces](static/tracing-top.gif)
141 | 
142 | 
143 | We use Mlflow Traces for monitoring our LLMs. This allows us to track various metrics and logs associated with the models over time. Additionally, we run evaluations on these traces using Mlflow Evaluate, with the LLM itself acting as a judge. This setup ensures that we maintain high standards for model performance and can quickly identify and address any issues that arise.
144 | 
145 | 
146 | ## LLMOps Design Pattern
147 | In this project we use a very similar design pattern to that recommended by databricks, where each model gets logged on mlflow before its deployed.
148 | ![LLMOps Databricks](static/llmops-rag.png)
149 | 
150 | The main variations here is that we the deployment pipeline is orchestrated in the form of two steps with register, validations and final deployment on the registry. Instead of data drift we are measuring differences in LLM metrics and finnaly we aren't using Mlflow AI Gateway (altough this or LiteLLM could be an adition in the future)
151 | 
152 | 
153 | # Install
154 | 
155 | This section details the requirements, actions, and next steps to kickstart your LLMOps project.
156 | 
157 | ## Prerequisites
158 | 
159 | - [Python>=3.10](https://www.python.org/downloads/): to benefit from [the latest features and performance improvements](https://docs.python.org/3/whatsnew/3.12.html)
160 | - [Poetry>=1.8.2](https://python-poetry.org/): to initialize the project [virtual environment](https://docs.python.org/3/library/venv.html) and its dependencies
161 | 
162 | Use the package manager [Poetry](https://python-poetry.org/):
163 | 
164 | ## Credentials for LLM
165 | 
166 | To access Bedrock, OpenAI, or any other LLM provider, you need to set up your  credentials. These credentials will allow the package to authenticate and interact with the respective services.
167 | In this code template we used Bedrock but feel free to change it to your needs.
168 | 
169 | Example for AWS 
170 | 
171 |  **Environment Variables**:
172 |     ```bash
173 |     export AWS_ACCESS_KEY_ID=your_access_key_id
174 |     export AWS_SECRET_ACCESS_KEY=your_secret_access_key
175 |     export AWS_REGION=your_default_region
176 |     ```
177 | 
178 | 
179 |   - You can easily replace `ChatBedrock` with `ChatOllama` or any other provider. 
180 | 
181 | 
182 | ## Installation
183 | 
184 | 1. [Clone this GitHub repository](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) on your computer
185 | ```bash
186 | # with ssh (recommended)
187 | $ git clone -
188 | # with https
189 | $ git clone -
190 | ```
191 | 2. [Run the project installation with poetry](https://python-poetry.org/docs/)
192 | ```bash
193 | $ cd llmops-python-package/
194 | $ poetry install
195 | ```
196 | 3. Adapt the code base to your desire
197 | 
198 | ## Next Steps
199 | 
200 | There are numerous ways to incorporate this package into your MLOps platform.
201 | 
202 | For example, you might choose Databricks or AWS for your compute platform and model registry.
203 | 
204 | Feel free to modify the package code to suit your specific needs. Best of luck!
205 | 
206 | 
207 | # Usage
208 | 
209 | This section explains how configure the project code and execute it on your system.
210 | 
211 | ## Configuration
212 | 
213 | You can add or edit config files in the `confs/` folder to change the program behavior.
214 | 
215 | ```yaml
216 | # confs/deployment.yaml
217 | job:
218 |   KIND: DeploymentJob
219 |   staging_alias: "champion"
220 |   production_alias: "production"
221 |   registry_model_name: "rag-chatbot-with-guardrails"
222 |   llm_confs: "/confs/rag_chain_config.yaml"
223 |   llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py"
224 |   vector_store_path: "http://localhost:6333"
225 | ```
226 | 
227 | This config file instructs the program to start a `DeploymentJob` with respective parameters
228 | You can find all the parameters of your program in the `src/[package]/pipelines/*.py` files.
229 | 
230 | You can also print the full schema supported by this package using `poetry run llmops --schema`.
231 | 
232 | ## Execution
233 | 
234 | The project code can be executed with poetry during your development, this is the order recommended:
235 | 
236 | ```bash
237 | $ poetry run llmops-project confs/generate_rag_dataset.yaml # Run once to generate rag dataset
238 | $ poetry run llmops-project confs/feature_eng.yaml # Creates Vector DB and Injests documents
239 | $ poetry run llmops-project confs/deployment.yaml # Deploys model on model registry
240 | $ poetry run llmops-project confs/monitoring.yaml # Monitors Model Inferences "every week"
241 | ```
242 | 
243 | To deploy the serving endpoint you can use the following automation:
244 | 
245 | ```bash
246 | $ inv serve # Launches Litserve server on port 8000
247 | ```
248 | 
249 | Note: you can also deploy this as a container /cloud with the instructions under `/serving_endpoint`
250 | 
251 | ## Pipelines
252 | This project is organized under a manager pattern, each manager is responsible for all the workflow orchestration betwen tasks/ jobs. (In production you could use airflow etc.. for this type of thing)
253 | 
254 | ### Generate Rag Dataset
255 | This pipeline generates a rag QA dataset under `/data/datasets/``
256 | 
257 | ### Feature Engineering 
258 | This pipeline creates a Vector Database instance collection and ingests documents onto it in the form of vectors.
259 | ![Vector Database](static/vector_db.png)
260 | 
261 | ### Deployment 
262 | This pipeline:
263 | -  registers a model using Mlflow 
264 | -  promote the model to `champion`alias
265 | -  validates model input /output and singatures
266 | -  sets tag "passed_tests" on mlflow registry to True/False depending if model passed tests
267 | -  runs an evaluation of QA factfullness on the QA dataset we created previously
268 | -  depending on the result of this evaluation, the model will be asigned a tag `beats_threshold` to True or False
269 | -  if the model `beats_threshold` and `passed_tests` we can promote it to `production`
270 | 
271 | At the end of this pipeline we should have a model version on the model registry in production.
272 | ![Model Version](static/model_version.png)
273 | 
274 | 
275 | ### Monitoring
276 | This pipeline is meant to be run as weekly job to monitor the performance of the model against given metrics such as default metrics or even LLM as a judge.
277 | ![Monitoring](static/monitoring.png)
278 | 
279 | These metrics are also saved with a display in case you want to load it in a dashboard elsewhere.
280 | ![Guage](static/guage.png)
281 | 
282 | 
283 | 
284 | 
285 | In production, you can build, ship, and run the project as a Python package:
286 | 
287 | ```bash
288 | poetry build
289 | poetry publish # optional
290 | python -m pip install [package]
291 | [package] confs/deployment.yaml
292 | ```
293 | 
294 | You can also install and use this package as a library for another AI/ML project:
295 | 
296 | ```python
297 | from [package] import pipelines
298 | 
299 | job = pipelines.DeploymentJob(...)
300 | with job as runner:
301 |     runner.run()
302 | ```
303 | 
304 | **Additional tips**:
305 | - You can pass extra configs from the command line using the `--extras` flag
306 |   - Use it to pass runtime values (e.g., a result from previous job executions)
307 | - You can pass several config files in the command-line to merge them from left to right
308 |   - You can define common configurations shared between jobs (e.g., model params)
309 | - The right job task will be selected automatically thanks to [Pydantic Discriminated Unions](https://docs.pydantic.dev/latest/concepts/unions/#discriminated-unions)
310 |   - This is a great way to run any job supported by the application (training, tuning, ....)
311 | 
312 | 
313 | ## Automation
314 | 
315 | This project includes several automation tasks to easily repeat common actions.
316 | 
317 | You can invoke the actions from the [command-line](https://www.pyinvoke.org/) or [VS Code extension](https://marketplace.visualstudio.com/items?itemName=dchanco.vsc-invoke).
318 | 
319 | ```bash
320 | # create a code archive
321 | $ inv packages
322 | # list other actions
323 | $ inv --list
324 | ```
325 | 
326 | **Available tasks**:
327 | - **checks.all (checks)** - Run all check tasks.
328 | - **checks.code** - Check the codes with ruff.
329 | - **checks.coverage** - Check the coverage with coverage.
330 | - **checks.format** - Check the formats with ruff.
331 | - **checks.poetry** - Check poetry config files.
332 | - **checks.security** - Check the security with bandit.
333 | - **checks.test** - Check the tests with pytest.
334 | - **checks.type** - Check the types with mypy.
335 | - **cleans.all (cleans)** - Run all tools and folders tasks.
336 | - **cleans.cache** - Clean the cache folder.
337 | - **cleans.coverage** - Clean the coverage tool.
338 | - **cleans.dist** - Clean the dist folder.
339 | - **cleans.docs** - Clean the docs folder.
340 | - **cleans.environment** - Clean the project environment file.
341 | - **cleans.folders** - Run all folders tasks.
342 | - **cleans.mlruns** - Clean the mlruns folder.
343 | - **cleans.mypy** - Clean the mypy tool.
344 | - **cleans.outputs** - Clean the outputs folder.
345 | - **cleans.poetry** - Clean poetry lock file.
346 | - **cleans.pytest** - Clean the pytest tool.
347 | - **cleans.projects** - Run all projects tasks.
348 | - **cleans.python** - Clean python caches and bytecodes.
349 | - **cleans.requirements** - Clean the project requirements file.
350 | - **cleans.reset** - Run all tools, folders, and sources tasks.
351 | - **cleans.ruff** - Clean the ruff tool.
352 | - **cleans.sources** - Run all sources tasks.
353 | - **cleans.tools** - Run all tools tasks.
354 | - **cleans.venv** - Clean the venv folder.
355 | - **commits.all (commits)** - Run all commit tasks.
356 | - **commits.bump** - Bump the version of the package.
357 | - **commits.commit** - Commit all changes with a message.
358 | - **commits.info** - Print a guide for messages.
359 | - **containers.all (containers)** - Run all container tasks.
360 | - **containers.build** - Build the container image with the given tag.
361 | - **containers.compose** - Start up docker compose.
362 | - **containers.run** - Run the container image with the given tag.
363 | - **docs.all (docs)** - Run all docs tasks.
364 | - **docs.api** - Document the API with pdoc using the given format and output directory.
365 | - **docs.serve** - Serve the API docs with pdoc using the given format and computer port.
366 | - **formats.all** - (formats) Run all format tasks.
367 | - **formats.imports** - Format python imports with ruff.
368 | - **formats.sources** - Format python sources with ruff.
369 | - **installs.all (installs)** - Run all install tasks.
370 | - **installs.poetry** - Install poetry packages.
371 | - **installs.pre-commit** - Install pre-commit hooks on git.
372 | - **mlflow.all (mlflow)** - Run all mlflow tasks.
373 | - **mlflow.doctor** - Run mlflow doctor to diagnose issues.
374 | - **mlflow.serve** - Start mlflow server with the given host, port, and backend uri.
375 | - **packages.all (packages)** - Run all package tasks.
376 | - **packages.build** - Build a python package with the given format.
377 | - **projects.all (projects)** - Run all project tasks.
378 | - **projects.environment** - Export the project environment file.
379 | - **projects.requirements** - Export the project requirements file.
380 | - **projects.run** - Run an mlflow project from MLproject file.
381 | 
382 | # Tools
383 | 
384 | This sections motivates the use of developer tools to improve your coding experience.
385 | - Most developer tools in this project are the same ones used in the mlops-python package. Check that resource for a detailed explanation on the motivation behind these toolings.
386 | 
387 | 
388 | 
389 | # Tips
390 | 
391 | This sections gives some tips and tricks to enrich the develop experience.
392 | 
393 | ## [Design Patterns](https://en.wikipedia.org/wiki/Software_design_pattern)
394 | 
395 | ### [Directed-Acyclic Graph](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
396 | 
397 | **You should use Directed-Acyclic Graph (DAG) to connect the steps of your ML pipeline.**
398 | 
399 | A DAG can express the dependencies between steps while keeping the individual step independent.
400 | 
401 | This package provides a simple DAG example in `tasks/dags.py`. This approach is based on [PyInvoke](https://www.pyinvoke.org/).
402 | 
403 | In production, we recommend to use a scalable system such as [Airflow](https://airflow.apache.org/), [Dagster](https://dagster.io/), [Prefect](https://www.prefect.io/), [Metaflow](https://metaflow.org/), or [ZenML](https://zenml.io/).
404 | 
405 | ### [Program Service](https://en.wikipedia.org/wiki/Systemd)
406 | 
407 | **You should provide a global context for the execution of your program.**
408 | 
409 | There are several approaches such as [Singleton](https://en.wikipedia.org/wiki/Singleton_pattern), [Global Variable](https://en.wikipedia.org/wiki/Global_variable), or [Component](https://github.com/stuartsierra/component).
410 | 
411 | This package takes inspiration from [Clojure mount](https://github.com/tolitius/mount). It provides an implementation in `src/[package]/io/services.py`.
412 | 
413 | ### [Soft Coding](https://en.wikipedia.org/wiki/Softcoding)
414 | 
415 | **You should separate the program implementation from the program configuration.**
416 | 
417 | Exposing configurations to users allow them to influence the execution behavior without code changes.
418 | 
419 | This package seeks to expose as much parameter as possible to the users in configurations stored in the `confs/` folder.
420 | 
421 | ### [SOLID Principles](https://en.wikipedia.org/wiki/SOLID)
422 | 
423 | **You should implement the SOLID principles to make your code as flexible as possible.**
424 | 
425 | - *Single responsibility principle*:  Class has one job to do. Each change in requirements can be done by changing just one class.
426 | - *Open/closed principle*: Class is happy (open) to be used by others. Class is not happy (closed) to be changed by others.
427 | - *Liskov substitution principle*: Class can be replaced by any of its children. Children classes inherit parent's behaviours.
428 | - *Interface segregation principle*: When classes promise each other something, they should separate these promises (interfaces) into many small promises, so it's easier to understand.
429 | - *Dependency inversion principle*: When classes talk to each other in a very specific way, they both depend on each other to never change. Instead classes should use promises (interfaces, parents), so classes can change as long as they keep the promise.
430 | 
431 | In practice, this mean you can implement software contracts with interface and swap the implementation.
432 | 
433 | For instance, you can implement several jobs in `src/[package]/jobs/*.py` and swap them in your configuration.
434 | 
435 | To learn more about the mechanism select for this package, you can check the documentation for [Pydantic Tagged Unions](https://docs.pydantic.dev/dev-v2/usage/types/unions/#discriminated-unions-aka-tagged-unions).
436 | 
437 | ### [IO Separation](https://en.wikibooks.org/wiki/Haskell/Understanding_monads/IO)
438 | 
439 | **You should separate the code interacting with the external world from the rest.**
440 | 
441 | The external is messy and full of risks: missing files, permission issue, out of disk ...
442 | 
443 | To isolate these risks, you can put all the related code in an `io` package and use interfaces
444 | 
445 | ## [Python Powers](https://realpython.com/)
446 | 
447 | ### [Context Manager](https://docs.python.org/3/library/contextlib.html)
448 | 
449 | **You should use Python context manager to control and enhance an execution.**
450 | 
451 | Python provides contexts that can be used to extend a code block. For instance:
452 | 
453 | ```python
454 | # in src/[package]/scripts.py
455 | with job as runner:  # context
456 |     runner.run()  # run in context
457 | ```
458 | 
459 | This pattern has the same benefit as [Monad](https://en.wikipedia.org/wiki/Monad_(functional_programming)), a powerful programming pattern.
460 | 
461 | The package uses `src/[package]/jobs/*.py` to handle exception and services.
462 | 
463 | ### [Python Package](https://packaging.python.org/en/latest/tutorials/packaging-projects/)
464 | 
465 | **You should create Python package to create both library and application for others.**
466 | 
467 | Using Python package for your AI/ML project has the following benefits:
468 | - Build code archive (i.e., wheel) that be uploaded to Pypi.org
469 | - Install Python package as a library (e.g., like pandas)
470 | - Expose script entry points to run a CLI or a GUI
471 | 
472 | To build a Python package with Poetry, you simply have to type in a terminal:
473 | ```bash
474 | # for all poetry project
475 | poetry build
476 | # for this project only
477 | inv packages
478 | ```
479 | 
480 | ## [Software Engineering](https://en.wikipedia.org/wiki/Software_engineering)
481 | 
482 | ### [Code Typing](https://docs.python.org/3/library/typing.html)
483 | 
484 | **You should type your Python code to make it more robust and explicit for your user.**
485 | 
486 | Python provides the [typing module](https://docs.python.org/3/library/typing.html) for adding type hints and [mypy](https://mypy-lang.org/) to checking them.
487 | 
488 | 
489 | This code snippet clearly state the inputs and outputs of the method, both for the developer and the type checker.
490 | 
491 | The package aims to type every functions and classes to facilitate the developer experience and fix mistakes before execution.
492 | 
493 | ### [Config Typing](https://docs.pydantic.dev/latest/)
494 | 
495 | **You should type your configuration to avoid exceptions during the program execution.**
496 | 
497 | Pydantic allows to define classes that can validate your configs during the program startup.
498 | 
499 | ```python
500 | # in src/[package]/utils/splitters.py
501 | class TrainTestSplitter(Splitter):
502 |     shuffle: bool = False  # required (time sensitive)
503 |     test_size: int | float = 24 * 30 * 2  # 2 months
504 |     random_state: int = 42
505 | ```
506 | 
507 | This code snippet allows to communicate the values expected and avoid error that could be avoided.
508 | 
509 | The package combines both OmegaConf and Pydantic to parse YAML files and validate them as soon as possible.
510 | 
511 | 
512 | ### [Object Oriented](https://en.wikipedia.org/wiki/Object-oriented_programming)
513 | 
514 | **You should use the Objected Oriented programming to benefit from [polymorphism](https://en.wikipedia.org/wiki/Polymorphism_(computer_science)).**
515 | 
516 | Polymorphism combined with SOLID Principles allows to easily swap your code components.
517 | 
518 | The package defines class interface whenever possible to provide intuitive and replaceable parts for your AI/ML project.
519 | 
520 | ### [Semantic Versioning](https://semver.org/)
521 | 
522 | **You should use semantic versioning to communicate the level of compatibility of your releases.**
523 | 
524 | Semantic Versioning (SemVer) provides a simple schema to communicate code changes. For package X.Y.Z:
525 | - *Major* (X): major release with breaking changed (i.e., imply actions from the benefit)
526 | - *Minor* (Y): minor release with new features (i.e., provide new capabilities)
527 | - *Patch* (Z): patch release to fix bugs (i.e., correct wrong behavior)
528 | 
529 | Poetry and this package leverage Semantic Versioning to let developers control the speed of adoption for new releases.
530 | 
531 | ## [Testing Tricks](https://en.wikipedia.org/wiki/Software_testing)
532 | 
533 | ### [Parallel Testing](https://pytest-xdist.readthedocs.io/en/stable/)
534 | 
535 | **You can run your tests in parallel to speed up the validation of your code base.**
536 | 
537 | Pytest can be extended with the [pytest-xdist plugin](https://pytest-xdist.readthedocs.io/en/stable/) for this purpose.
538 | 
539 | This package enables Pytest in its automation tasks by default.
540 | 
541 | ### [Test Fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html)
542 | 
543 | **You should define reusable objects and actions for your tests with [fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html).**
544 | 
545 | Fixture can prepare objects for your test cases, such as dataframes, models, files.
546 | 
547 | This package defines fixtures in `tests/conftest.py` to improve your testing experience.
548 | 
549 | ## [VS Code](https://code.visualstudio.com/)
550 | 
551 | ### [Code Workspace](https://code.visualstudio.com/docs/editor/workspaces)
552 | 
553 | **You can use VS Code workspace to define configurations for your project.**
554 | 
555 | [Code Workspace](https://code.visualstudio.com/docs/editor/workspaces) can enable features (e.g. formatting) and set the default interpreter.
556 | 
557 | ```json
558 | {
559 | 	"settings": {
560 | 		"editor.formatOnSave": true,
561 | 		"python.defaultInterpreterPath": ".venv/bin/python",
562 |     ...
563 | 	},
564 | }
565 | ```
566 | 
567 | This package defines a workspace file that you can load from `[package].code-workspace`.
568 | 
569 | ### [GitHub Copilot](https://github.com/features/copilot)
570 | 
571 | **You can use GitHub Copilot to increase your coding productivity by 30%.**
572 | 
573 | [GitHub Copilot](https://github.com/features/copilot) has been a huge productivity thanks to its smart completion.
574 | 
575 | You should become familiar with the solution in less than a single coding session.
576 | 
577 | ### [VSCode VIM](https://marketplace.visualstudio.com/items?itemName=vscodevim.vim)
578 | 
579 | **You can use VIM keybindings to more efficiently navigate and modify your code.**
580 | 
581 | Learning VIM is one of the best investment for a career in IT. It can make you 30% more productive.
582 | 
583 | Compared to GitHub Copilot, VIM can take much more time to master. You can expect a ROI in less than a month.
584 | 
585 | # Resources
586 | 
587 | This section provides resources for building packages for Python and AI/ML/MLOps.
588 | 
589 | ## Python
590 | 
591 | - https://github.com/krzjoa/awesome-python-data-science#readme
592 | - https://github.com/ml-tooling/best-of-ml-python
593 | - https://github.com/ml-tooling/best-of-python
594 | - https://github.com/ml-tooling/best-of-python-dev
595 | - https://github.com/vinta/awesome-python
596 | 
597 | ## AI/ML/MLOps
598 | 
599 | - https://github.com/josephmisiti/awesome-machine-learning
600 | - https://github.com/visenger/awesome-mlops
601 | 
602 | 
603 | 
604 | 
605 | 
606 | 


--------------------------------------------------------------------------------
/confs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/confs/.gitkeep


--------------------------------------------------------------------------------
/confs/deployment.yaml:
--------------------------------------------------------------------------------
 1 | job:
 2 |   KIND: DeploymentJob
 3 |   staging_alias: "champion"
 4 |   production_alias: "production"
 5 |   registry_model_name: "rag-chatbot-with-guardrails"
 6 |   llm_confs: "/confs/rag_chain_config.yaml"
 7 |   llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py"
 8 |   vector_store_path: "https://34beb054-2278-47fe-9731-cd7ed574320f.eu-west-2-0.aws.cloud.qdrant.io:6333" #"http://localhost:6333"
 9 |   qa_dataset_path: "/data/datasets/rag_dataset.csv"
10 |   alias: "champion"
11 | 
12 |   metric_tresholds:
13 |     flesch_kincaid_grade_level_mean: 5.1 # bigger than
14 |     ari_grade_level_mean: 4.1 # bigger than


--------------------------------------------------------------------------------
/confs/feature_eng.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: FeatureEngineeringJob
3 |   embedding_model: "amazon.titan-embed-text-v1" 
4 |   vector_store_path: "http://localhost:6333" 
5 |   document_path: "/data/documents/"
6 |   collection_name: "hr-documents"


--------------------------------------------------------------------------------
/confs/generate_rag_dataset.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: GenerateRagDatasetJob
3 |   data_path: "/data/documents/"
4 |   qa_dataset_path_csv: "data/datasets/rag_dataset.csv" # will save a QA dataset in csv and json
5 |   qa_dataset_path_json: "data/datasets/rag_dataset.json" # will save a QA dataset in csv and json
6 |   llm_model: "anthropic.claude-3-haiku-20240307-v1:0" # model_id to generate the QA dataset
7 |   


--------------------------------------------------------------------------------
/confs/monitoring.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: MonitoringEvalJob
3 |   vector_store_path: "/faiss_db/"
4 |   registry_model_name: "rag-chatbot"
5 | 
6 |   trace_experiment_name: "rag_chatbot_experiment"
7 |   monitoring_experiment_name: "monitoring"
8 | 
9 | 


--------------------------------------------------------------------------------
/confs/rag_chain_config.yaml:
--------------------------------------------------------------------------------
 1 | guardrail_config:
 2 |   model: anthropic.claude-3-haiku-20240307-v1:0
 3 |   topics:
 4 |     valid:
 5 |       - HR Policies
 6 |       - company culture
 7 |       - team building
 8 |       - leadership
 9 |       - management
10 |       - productivity
11 |       
12 |     invalid:
13 |       - software programming
14 |       - religion
15 |       - politics
16 |       - sports
17 | input_example:
18 |   messages:
19 |   - content: What is the company's sick leave policy?
20 |     role: user
21 |   - content: The company's sick leave policy allows employees to take a certain number
22 |       of sick days per year. Please refer to the employee handbook for specific details
23 |       and eligibility criteria.
24 |     role: assistant
25 |   - content: How many sick days can I take per year?
26 |     role: user
27 | output_example:
28 |   result: "example text"
29 |   sources:
30 |     - "example_source_1.pdf"
31 |     - "example_source_2.pdf"
32 |   
33 | llm_config:
34 |   llm_model: anthropic.claude-3-haiku-20240307-v1:0
35 |   llm_parameters:
36 |     max_tokens: 4000
37 |     temperature: 0.01
38 |   llm_prompt_template: "\n                You are a trustful assistant for HR Policies.\
39 |     \ You are answering employee benefits, leave policies, performance management,\
40 |     \ recruitment, onboarding, and other HR-related topics. If you do not know the\
41 |     \ answer to a question, you truthfully say you do not know. Read the discussion\
42 |     \ to get the context of the previous conversation. In the chat discussion, you\
43 |     \ are referred to as \"system\". The user is referred to as \"user\".\n\n    \
44 |     \            Discussion: {chat_history}\n\n                Here's some context\
45 |     \ which might or might not help you answer: {context}\n\n                Answer\
46 |     \ straight, do not repeat the question, do not start with something like: the\
47 |     \ answer to the question, do not add \"AI\" in front of your answer, do not say:\
48 |     \ here is the answer, do not mention the context or the question.\n\n        \
49 |     \        Based on this history and context, answer this question: {question}\n\
50 |     \                "
51 |   llm_refusal_fallback_answer: I cannot answer this question.
52 |   query_rewriter_prompt_template: "\n                Based on the chat history below,\
53 |     \ we want you to generate a query for an external data source to retrieve relevant\
54 |     \ documents so that we can better answer the question. The query should be in\
55 |     \ natual language. The external data source uses similarity search to search for\
56 |     \ relevant documents in a vector space. So the query should be similar to the\
57 |     \ relevant documents semantically. Answer with only the query. Do not add explanation.\n\
58 |     \n                Chat history: {chat_history}\n\n                Question: {question}\n\
59 |     \                "
60 | retriever_config:
61 |   embedding_model: "amazon.titan-embed-text-v1"
62 |   parameters:
63 |     k: 10
64 |     score_threshold: 0.5
65 |   schema:
66 |     document_uri: source
67 |   vector_store_path: "http://localhost:6333"
68 |   collection_name: "hr-documents"
69 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/.gitkeep


--------------------------------------------------------------------------------
/data/datasets/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/datasets/.gitkeep


--------------------------------------------------------------------------------
/data/documents/sample_hr_manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/data/documents/sample_hr_manual.pdf


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | # https://docs.docker.com/compose/compose-file/
 2 | 
 3 | services:
 4 |   mlflow:
 5 |     image: ghcr.io/mlflow/mlflow:v2.17.1
 6 |     ports:
 7 |       - 5001:5000
 8 |     environment:
 9 |       - MLFLOW_HOST=0.0.0.0
10 |     command: mlflow server
11 |   
12 |   qdrant:
13 |       image: qdrant/qdrant:latest
14 |       restart: always
15 |       container_name: qdrant
16 |       ports:
17 |         - 6333:6333
18 |         - 6334:6334
19 |       expose:
20 |         - 6333
21 |         - 6334
22 |         - 6335
23 |       configs:
24 |         - source: qdrant_config
25 |           target: /qdrant/config/production.yaml
26 |       volumes:
27 |         - ./qdrant_data:/qdrant/storage
28 | 
29 | configs:
30 |   qdrant_config:
31 |     content: |
32 |       log_level: INFO      


--------------------------------------------------------------------------------
/docs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/docs/.gitkeep


--------------------------------------------------------------------------------
/invoke.yaml:
--------------------------------------------------------------------------------
1 | # https://docs.pyinvoke.org/en/latest/index.html
2 | 
3 | run:
4 |   echo: true
5 | project:
6 |   name: LLMOps Project
7 |   package: llmops_project
8 |   repository: llmops-project
9 | 


--------------------------------------------------------------------------------
/llmops-project.code-workspace:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"folders": [
 3 | 		{
 4 | 			"path": "."
 5 | 		}
 6 | 	],
 7 | 	"settings": {
 8 | 		"editor.formatOnSave": true,
 9 | 		"python.defaultInterpreterPath": ".venv/bin/python",
10 | 		"python.testing.pytestEnabled": true,
11 | 		"python.testing.pytestArgs": [
12 | 			"tests"
13 | 		],
14 | 		"[python]": {
15 | 			"editor.codeActionsOnSave": {
16 | 				"source.organizeImports": "explicit"
17 | 			},
18 | 			"editor.defaultFormatter": "charliermarsh.ruff",
19 | 		},
20 | 	},
21 | 	"extensions": {
22 | 		"recommendations": [
23 | 			"charliermarsh.ruff",
24 | 			"dchanco.vsc-invoke",
25 | 			"ms-python.mypy-type-checker",
26 | 			"ms-python.python",
27 | 			"ms-python.vscode-pylance",
28 | 			"redhat.vscode-yaml",
29 | 		]
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/mlruns/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/mlruns/.gitkeep


--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/notebooks/.gitkeep


--------------------------------------------------------------------------------
/outputs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/outputs/.gitkeep


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | # https://python-poetry.org/docs/configuration/
2 | 
3 | [virtualenvs]
4 | in-project = true
5 | prefer-active-python = true
6 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # https://python-poetry.org/docs/pyproject/
  2 | 
  3 | # PROJECT
  4 | 
  5 | [tool.poetry]
  6 | name = "llmops-project"
  7 | version = "0.2.0"
  8 | description = "LLMOps Package Production Ready template using open source technologies."
  9 | repository = "https://github.com/callmesora/llmops-project"
 10 | documentation = "https://callmesora.github.io/llmops-project/"
 11 | authors = ["callmesora"]
 12 | readme = "README.md"
 13 | license = "MIT"
 14 | packages = [{ include = "llmops_project", from = "src" }]
 15 | 
 16 | # SCRIPTS
 17 | 
 18 | [tool.poetry.scripts]
 19 | llmops-project = "llmops_project.scripts:main"
 20 | 
 21 | # DEPENDENCIES
 22 | 
 23 | [tool.poetry.dependencies]
 24 | python = ">=3.10,<3.13"
 25 | mlflow = {extras = ["genai"], version = "^2.17.2"}
 26 | setuptools = "^71.1.0"
 27 | langchain = "^0.3.5"
 28 | # langchain-ollama = "^0.2.0"
 29 | llama-index = "^0.12.0"
 30 | # langgraph = "0.2.27"
 31 | #llama-index-utils-workflow = "^0.2.2"
 32 | #llama-index-llms-ollama = "^0.3.4"
 33 | # gensim = "^4.0.0"
 34 | nltk = "^3.9.1"
 35 | langchain-community = "^0.3.4"
 36 | transformers = "^4.46.1"
 37 | #streamlit = "^1.39.0"
 38 | pypdf2 = "^3.0.1"
 39 | defusedxml = "^0.7.1"
 40 | #faiss-cpu = "^1.9.0"
 41 | spacy = "3.7.0"
 42 | textstat = "^0.7.4"
 43 | torch = "^2.5.1"
 44 | evaluate = "^0.4.3"
 45 | loguru = "^0.7.2"
 46 | omegaconf = "^2.3.0"
 47 | pandera = "^0.21.0"
 48 | #sagemaker-mlflow = "0.1.0"
 49 | langchain-aws = "^0.2.7"
 50 | langchain-openai = "^0.2.9"
 51 | plotly = "5.3.1"
 52 | kaleido = "0.2.1"
 53 | seaborn = "^0.13.2"
 54 | sagemaker = "^2.235.1"
 55 | litserve = "^0.2.4"
 56 | #nemoguardrails = "^0.11.0"
 57 | guardrails-ai = "^0.6.0"
 58 | tenacity = ">=8.2.0,<8.4.0"
 59 | presidio-analyzer = "^2.2.355"
 60 | presidio-anonymizer = "^2.2.355"
 61 | qdrant-client = "^1.12.1"
 62 | unstructured = {extras = ["pdf"], version = "^0.16.8"}
 63 | libmagic = "^1.0"
 64 | python-magic = "^0.4.27"
 65 | langchain-qdrant = "^0.2.0"
 66 | pytest = "^8.3.4"
 67 | pytest-ordering = "^0.6"
 68 | llama-index-llms-bedrock = "^0.3.1"
 69 | 
 70 | 
 71 | 
 72 | [tool.poetry.group.checks.dependencies]
 73 | bandit = "^1.7.9"
 74 | coverage = "^7.5.4"
 75 | mypy = "^1.10.1"
 76 | pytest = "^8.2.2"
 77 | pytest-cov = "^5.0.0"
 78 | pytest-xdist = "^3.6.1"
 79 | ruff = "^0.5.0"
 80 | 
 81 | [tool.poetry.group.commits.dependencies]
 82 | commitizen = "^3.27.0"
 83 | pre-commit = "^3.7.1"
 84 | 
 85 | [tool.poetry.group.dev.dependencies]
 86 | invoke = "^2.2.0"
 87 | 
 88 | [tool.poetry.group.docs.dependencies]
 89 | pdoc = "^14.5.1"
 90 | 
 91 | [tool.poetry.group.notebooks.dependencies]
 92 | ipykernel = "^6.29.4"
 93 | nbformat = "^5.10.4"
 94 | 
 95 | # CONFIGURATIONS
 96 | 
 97 | [tool.bandit]
 98 | targets = ["src"]
 99 | 
100 | [tool.commitizen]
101 | name = "cz_conventional_commits"
102 | tag_format = "v$version"
103 | version_scheme = "pep440"
104 | version_provider = "poetry"
105 | update_changelog_on_bump = true
106 | 
107 | [tool.coverage.run]
108 | branch = true
109 | source = ["src"]
110 | omit = ["__main__.py"]
111 | 
112 | [tool.mypy]
113 | pretty = true
114 | strict = false
115 | python_version = "3.12"
116 | check_untyped_defs = true
117 | ignore_missing_imports = true
118 | plugins = ["pandera.mypy", "pydantic.mypy"]
119 | no_implicit_optional= false
120 | 
121 | # Ignore specific error codes
122 | disable_error_code = "no-untyped-call"
123 | 
124 | [tool.pytest.ini_options]
125 | addopts = "--verbosity=2"
126 | pythonpath = ["src"]
127 | filterwarnings = [
128 |     "ignore:Pydantic V1 style `@validator` validators are deprecated:DeprecationWarning",
129 |     "ignore:Pydantic V1 style `@root_validator` validators are deprecated:DeprecationWarning"
130 | ]
131 | 
132 | 
133 | [tool.ruff]
134 | fix = true
135 | indent-width = 4
136 | line-length = 100
137 | target-version = "py312"
138 | 
139 | [tool.ruff.format]
140 | docstring-code-format = true
141 | 
142 | [tool.ruff.lint.pydocstyle]
143 | convention = "google"
144 | 
145 | [tool.ruff.lint.per-file-ignores]
146 | "tests/*.py" = ["D100", "D103"]
147 | 
148 | # SYSTEMS
149 | 
150 | [build-system]
151 | requires = ["poetry-core"]
152 | build-backend = "poetry.core.masonry.api"
153 | 


--------------------------------------------------------------------------------
/serving_endpoint/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use a multi-stage build to first get uv
 2 | FROM ghcr.io/astral-sh/uv:0.5.4 as uv
 3 | FROM python:3.12-slim
 4 | 
 5 | # Define build arguments
 6 | ARG GUARDRAILS_TOKEN
 7 | 
 8 | # Create a virtual environment with uv inside the container
 9 | RUN --mount=from=uv,source=/uv,target=./uv \
10 |     ./uv venv /opt/venv
11 | 
12 | # We need to set this environment variable so that uv knows where
13 | # the virtual environment is to install packages
14 | ENV VIRTUAL_ENV=/opt/venv
15 | 
16 | # Make sure that the virtual environment is in the PATH so
17 | # we can use the binaries of packages that we install such as pip
18 | # without needing to activate the virtual environment explicitly
19 | ENV PATH="/opt/venv/bin:$PATH"
20 | 
21 | # Copy the requirements file into the container
22 | COPY requirements.txt .
23 | 
24 | # Install the packages with uv using --mount=type=cache to cache the downloaded packages
25 | RUN --mount=type=cache,target=/root/.cache/uv \
26 |     --mount=from=uv,source=/uv,target=./uv \
27 |     ./uv pip install -r requirements.txt litserve==0.2.4
28 | 
29 | # Print to terminal Guardrail token to see if it is set correctly
30 | 
31 | # Set the GUARDRAILS_TOKEN as an environment variable
32 | ENV GUARDRAILS_TOKEN=${GUARDRAILS_TOKEN}
33 | 
34 | RUN guardrails configure --enable-metrics --enable-remote-inferencing --token ${GUARDRAILS_TOKEN}
35 | 
36 | RUN guardrails hub install hub://tryolabs/restricttotopic
37 | 
38 | 
39 | WORKDIR /app
40 | COPY . /app
41 | 
42 | EXPOSE 8000
43 | CMD ["python", "/app/server.py"]


--------------------------------------------------------------------------------
/serving_endpoint/README.md:
--------------------------------------------------------------------------------
 1 | # ML Serving Endpoint with LitServe
 2 | 
 3 | This project sets up a FastAPI server using `litserve` that loads the latest MLflow model from the model registry and runs inference on it. The server includes input and output validations using Pydantic and can scale well to CPU/GPU workloads with batch capability.
 4 | 
 5 | ![ML Serving Endpoint](./imgs/litserve.png)
 6 | 
 7 | ## ML Design Pattern
 8 | In this example we will be looking at a realistic LLMOps example where we are loading a langchain  model from a model registry and serving it in a LitServe container
 9 | 
10 | ![ML Serving Endpoint](./imgs/litserve-deployment.png)
11 | 
12 | 
13 | ## MLflow Model Registry
14 | 
15 | MLflow Model Registry is a centralized repository to manage and deploy machine learning models. It provides model lineage, versioning, and lifecycle management. Models can be registered, and different versions of the model can be tracked and deployed.
16 | 
17 | ![ML Serving Endpoint](./imgs/model-registry.webp)
18 | 
19 | 
20 | ## Loading Model from Registry
21 | 
22 | The server loads the latest model from the MLflow model registry. This pattern ensures that the most recent and validated model is always used for inference. The model is loaded at the server startup and is ready to serve predictions.
23 | 
24 | ![ML Serving Endpoint](./imgs/model-registry.webp)
25 | 
26 | 
27 | ## Server Details
28 | 
29 | The FastAPI server (`litserve`) is designed to handle various workloads, including CPU and GPU, and supports batch processing for efficient inference. Input and output data are validated using Pydantic to ensure data integrity and correctness.
30 | 
31 | ## Deployment
32 | 
33 | You can choose to deploy this on kubernetes or your cloud provider as well as at the edge.
34 | 
35 | ## Instructions
36 | 
37 | ## Setup ENV File
38 | Setup an .env file with the following variables
39 | 
40 | ```.env
41 | # For Running the model
42 | OPENAI_API_KEY= ... # or any other provider API  key if needed
43 | 
44 | # If your MLflow tracking server is on AWS
45 | AWS_ACCESS_KEY_ID= ...
46 | AWS_SECRET_ACCESS_KEY= ...
47 | AWS_REGION= ...
48 | 
49 | ```
50 | ### Build the Docker Image
51 | 
52 | To build the Docker image, run the following command:
53 | 
54 | ```sh
55 | docker build --build-arg GUARDRAILS_TOKEN=$(grep GUARDRAILS_TOKEN .env | cut -d '=' -f2) -t litserve-model:latest .
56 | ```
57 | ### Run the Docker Image
58 | 
59 | To run the Docker image, use the following command:
60 | 
61 | ```sh
62 | docker run --env-file .env -p 8000:8000 litserve-model:latest
63 | ```
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/serving_endpoint/client.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | vector_store_path = "http://localhost:6333"
 4 | 
 5 | 
 6 | non_relevant_dialog = {  # This will test Guardrail
 7 |     "messages": [
 8 |         {"role": "user", "content": "What is the company's sick leave policy?"},
 9 |         {
10 |             "role": "assistant",
11 |             "content": "The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.",
12 |         },
13 |         {"role": "user", "content": "What is the meaning of life?"},
14 |     ],
15 |     "vector_store_path": vector_store_path,
16 | }
17 | 
18 | relevant_dialog = {  # This will test schema
19 |     "messages": [
20 |         {"role": "user", "content": "What is  discussed in the HR manual?"},
21 |     ],
22 |     "vector_store_path": vector_store_path,
23 | }
24 | 
25 | 
26 | response = requests.post("http://localhost:8000/predict", json=non_relevant_dialog)
27 | print(response.json())
28 | # print(response.headers["X-Request-Id"])  # This will print "00000"
29 | 
30 | 
31 | print("-------------------")
32 | print("Relevant Dialog")
33 | 
34 | response = requests.post("http://localhost:8000/predict", json=relevant_dialog)
35 | print(response.json())
36 | 


--------------------------------------------------------------------------------
/serving_endpoint/ecs/Makefile:
--------------------------------------------------------------------------------
 1 | include .env 
 2 | 
 3 | .EXPORT_ALL_VARIABLES:
 4 | APP_NAME=litserve-endpoint
 5 | 
 6 | TAG=latest
 7 | TF_VAR_app_name=${APP_NAME}
 8 | REGISTRY_NAME=${APP_NAME}
 9 | TF_VAR_image=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REGISTRY_NAME}:${TAG}
10 | TF_VAR_region=${AWS_REGION}
11 | 
12 | 
13 | setup-ecr: 
14 | 	cd infra/setup && terraform init && terraform apply -auto-approve
15 | 
16 | deploy-container:
17 | 	sh deploy-image-to-ecs.sh
18 | 
19 | deploy-service:
20 | 	cd infra/app && terraform init && terraform apply -auto-approve
21 | 
22 | destroy-service:
23 | 	cd infra/app && terraform init && terraform destroy -auto-approve


--------------------------------------------------------------------------------
/serving_endpoint/ecs/README.md:
--------------------------------------------------------------------------------
 1 | # How to use this Repo
 2 | ## Warning
 3 | - Always make sure to destroy your API Service. Forgetting to do so could incur a large AWS fee
 4 | - Never commit your AWS Account ID to git. Save it in an `.env` file and ensure `.env` is added to your `.gitiginore`
 5 | 
 6 | ## Setup, Deploy, and Destroy
 7 | 
 8 | ### Setup Env Variables
 9 | Add an `.env` file containing your AWS account ID and region. Example file:
10 | ```
11 | AWS_ACCOUNT_ID=1234567890
12 | AWS_REGION=ap-southeast-1
13 | ```
14 | 
15 | Create a `backend.tf` file and add it to both `/infra/setup/backend.tf` and `/infra/app/backend.tf`. Example files:
16 | ```
17 | terraform {
18 |   backend "s3" {
19 |     region = "<AWS_REGION>"
20 |     bucket = "<BUCKET_NAME>"
21 |     key    = "<APP_NAME>/terraform.tfstate"
22 |   }
23 | }
24 | ```
25 | ```
26 | terraform {
27 |   backend "s3" {
28 |     region = "<AWS_REGION>"
29 |     bucket = "<BUCKET_NAME>"
30 |     key    = "<APP_NAME>/terraform.tfstate"
31 |   }
32 | }
33 | ```
34 | Alternatively you can skip this step to store your Terraform state locally.
35 | 
36 | <br>
37 | 
38 | ### Setup, Deploy, and Destroy Infrastructure/App
39 | All of the following commands are run via the Makefile.
40 | 
41 | 1. Setup your ECR Repository (one time)
42 |     ```
43 |     make setup-ecr
44 |     ```
45 | 
46 | <br>
47 | 
48 | 2. Build and deploy your container
49 |     ```
50 |     make deploy-container
51 |     ```
52 | 
53 | <br>
54 | 
55 | 3. Deploy your API Service on ECS Fargate
56 |     ```
57 |     make deploy-service
58 |     ```
59 |     Note: The URL for your endpoint will be printed by Terraform once the above command is done executing. Example: `alb_dns_name = "<APP_NAME>-alb-123456789.<AWS_REGION>.elb.amazonaws.com"`. Navigate to that URL in your browser to ensure the API is working. You can also check out the API docs at the `<URL>/docs` endpoint.
60 | 
61 | <br>
62 | 
63 | 4. Destroy your API Service on ECS Fargate
64 |     ```
65 |     make destroy-service
66 |     ```
67 | 
68 | Based on:
69 | https://medium.com/aspiring-data-scientist/deploy-a-fastapi-app-on-aws-ecs-034b8b7b5ac2
70 | https://github.com/tomsharp/fastapi-on-ecs/tree/main


--------------------------------------------------------------------------------
/serving_endpoint/ecs/deploy-image-to-ecs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | echo "Logging in to ECR"
 3 | aws ecr get-login-password --region $AWS_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com
 4 | 
 5 | IMAGE_PATH="../"
 6 | 
 7 | echo "Building image"
 8 | docker build --no-cache --platform=linux/amd64 -t $REGISTRY_NAME $IMAGE_PATH
 9 | 
10 | echo "Tagging image"
11 | docker tag $REGISTRY_NAME:$TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$REGISTRY_NAME:$TAG
12 | 
13 | echo "Pushing image to ECR"
14 | docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/$REGISTRY_NAME:$TAG


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/ecs/main.tf:
--------------------------------------------------------------------------------
  1 | # ALB
  2 | resource "aws_security_group" "alb" {
  3 |   name   = "${var.app_name}-alb-sg"
  4 |   vpc_id = var.vpc_id
  5 |   egress {
  6 |     from_port   = 0
  7 |     to_port     = 0
  8 |     protocol    = "-1"
  9 |     cidr_blocks = ["0.0.0.0/0"]
 10 |   }
 11 |   ingress {
 12 |     from_port   = 8000
 13 |     to_port     = 8000
 14 |     protocol    = "tcp"
 15 |     cidr_blocks = ["0.0.0.0/0"]
 16 |   }
 17 |   ingress {
 18 |     from_port   = 443
 19 |     to_port     = 443
 20 |     protocol    = "tcp"
 21 |     cidr_blocks = ["0.0.0.0/0"]
 22 |   }
 23 | }
 24 | resource "aws_lb" "this" {
 25 |   name               = "${var.app_name}-alb"
 26 |   load_balancer_type = "application"
 27 |   security_groups    = [aws_security_group.alb.id]
 28 |   subnets            = var.public_subnet_ids
 29 | }
 30 | resource "aws_lb_target_group" "this" {
 31 |   name        = "${var.app_name}-lb-tg"
 32 |   vpc_id      = var.vpc_id
 33 |   port        = 8000
 34 |   protocol    = "HTTP"
 35 |   target_type = "ip"
 36 |   health_check {
 37 |     port                = 8000
 38 |     path                = "/docs"
 39 |     interval            = 30
 40 |     protocol            = "HTTP"
 41 |     timeout             = 5
 42 |     unhealthy_threshold = 2
 43 |     matcher             = 200
 44 |   }
 45 | }
 46 | resource "aws_lb_listener" "http" {
 47 |   port              = "8000"
 48 |   protocol          = "HTTP"
 49 |   load_balancer_arn = aws_lb.this.arn
 50 |   default_action {
 51 |     target_group_arn = aws_lb_target_group.this.arn
 52 |     type             = "forward"
 53 |   }
 54 |   depends_on = [aws_lb_target_group.this]
 55 | }
 56 | resource "aws_lb_listener_rule" "this" {
 57 |   listener_arn = aws_lb_listener.http.arn
 58 |   action {
 59 |     type             = "forward"
 60 |     target_group_arn = aws_lb_target_group.this.arn
 61 |   }
 62 |   condition {
 63 |     path_pattern {
 64 |       values = ["*"]
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | # IAM 
 70 | data "aws_iam_policy_document" "ecs_assume_policy" {
 71 |   statement {
 72 |     actions = ["sts:AssumeRole"]
 73 |     principals {
 74 |       type        = "Service"
 75 |       identifiers = ["ecs-tasks.amazonaws.com"]
 76 |     }
 77 |   }
 78 | }
 79 | resource "aws_iam_role" "ecs_execution_role" {
 80 |   name               = "${var.app_name}-execution-role"
 81 |   assume_role_policy = data.aws_iam_policy_document.ecs_assume_policy.json
 82 | }
 83 | resource "aws_iam_policy" "ecs_execution_policy" {
 84 |   name = "${var.app_name}-ecs-execution-role-policy"
 85 |   policy = jsonencode({
 86 |     Version = "2012-10-17"
 87 |     Statement = [
 88 |       {
 89 |         Effect : "Allow",
 90 |         Action : [
 91 |           "ecr:*",
 92 |           "ecs:*",
 93 |           "elasticloadbalancing:*",
 94 |           "cloudwatch:*",
 95 |           "logs:*"
 96 |         ],
 97 |         Resource : "*"
 98 |       }
 99 |     ]
100 |   })
101 | }
102 | resource "aws_iam_role_policy_attachment" "ecs_execution_role_policy_attach" {
103 |   role       = aws_iam_role.ecs_execution_role.name
104 |   policy_arn = aws_iam_policy.ecs_execution_policy.arn
105 | }
106 | 
107 | # ECS 
108 | resource "aws_cloudwatch_log_group" "ecs" {
109 |   name = "/aws/ecs/${var.app_name}/cluster"
110 | }
111 | resource "aws_ecs_task_definition" "api" {
112 |   family                   = "${var.app_name}-api-task"
113 |   requires_compatibilities = ["FARGATE"]
114 |   network_mode             = "awsvpc"
115 |   execution_role_arn       = aws_iam_role.ecs_execution_role.arn
116 |   task_role_arn            = aws_iam_role.ecs_execution_role.arn
117 |   cpu                      = 256
118 |   memory                   = 512
119 |   container_definitions = jsonencode([
120 |     {
121 |       name    = "${var.app_name}-api-container"
122 |       image   = "${var.image}"
123 |       #command = ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
124 |       portMappings = [
125 |         {
126 |           hostPort      = 8000
127 |           containerPort = 8000
128 |           protocol      = "tcp"
129 |         }
130 |       ],
131 |       logConfiguration = {
132 |         logDriver = "awslogs"
133 |         options = {
134 |           awslogs-group         = aws_cloudwatch_log_group.ecs.name
135 |           awslogs-stream-prefix = "ecs"
136 |           awslogs-region        = var.region
137 |         }
138 |       }
139 |     }
140 |   ])
141 | }
142 | 
143 | # Cluster 
144 | resource "aws_ecs_cluster" "this" {
145 |   name = "${var.app_name}-cluster"
146 |   setting {
147 |     name  = "containerInsights"
148 |     value = "enabled"
149 |   }
150 | }
151 | 
152 | # Security Group and Service
153 | resource "aws_security_group" "ecs" {
154 |   name   = "${var.app_name}-ecs-sg"
155 |   vpc_id = var.vpc_id
156 |   egress {
157 |     from_port   = 0
158 |     to_port     = 0
159 |     protocol    = "-1"
160 |     cidr_blocks = ["0.0.0.0/0"]
161 |   }
162 |   ingress {
163 |     from_port       = 8000
164 |     to_port         = 8000
165 |     protocol        = "tcp"
166 |     security_groups = [aws_security_group.alb.id]
167 |   }
168 | }
169 | resource "aws_ecs_service" "api" {
170 |   name            = "${var.app_name}-ecs-service"
171 |   cluster         = aws_ecs_cluster.this.name
172 |   launch_type     = "FARGATE"
173 |   desired_count   = length(var.private_subnet_ids)
174 |   task_definition = aws_ecs_task_definition.api.arn
175 |   network_configuration {
176 |     subnets         = var.private_subnet_ids
177 |     security_groups = [aws_security_group.ecs.id]
178 |   }
179 |   load_balancer {
180 |     target_group_arn = aws_lb_target_group.this.arn
181 |     container_name   = "${var.app_name}-api-container"
182 |     container_port   = "8000"
183 |   }
184 |   lifecycle {
185 |     ignore_changes = [
186 |       desired_count,
187 |     ]
188 |   }
189 |   depends_on = [aws_lb_listener_rule.this]
190 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/ecs/output.tf:
--------------------------------------------------------------------------------
1 | output "alb_dns_name" {
2 |   value = aws_lb.this.dns_name
3 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/ecs/variable.tf:
--------------------------------------------------------------------------------
 1 | variable "app_name" {
 2 |   description = "Name of the app."
 3 |   type        = string
 4 | }
 5 | variable "region" {
 6 |   description = "AWS region to deploy the network to."
 7 |   type        = string
 8 | }
 9 | variable "image" {
10 |   description = "Image used to start the container. Should be in repository-url/image:tag format."
11 |   type        = string
12 | }
13 | variable "vpc_id" {
14 |   description = "ID of the VPC where the ECS will be hosted."
15 |   type        = string
16 | }
17 | variable "public_subnet_ids" {
18 |   description = "IDs of public subnets where the ALB will be attached to."
19 |   type        = list(string)
20 | }
21 | variable "private_subnet_ids" {
22 |   description = "IDs of private subnets where the ECS service will be deployed to."
23 |   type        = list(string)
24 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/main.tf:
--------------------------------------------------------------------------------
 1 | provider "aws" {
 2 |   region = var.region
 3 |   default_tags {
 4 |     tags = {
 5 |       app = var.app_name
 6 |     }
 7 |   }
 8 | }
 9 | 
10 | module "network" {
11 |   source   = "./network"
12 |   app_name = var.app_name
13 |   region   = var.region
14 | }
15 | 
16 | module "ecs" {
17 |   source             = "./ecs"
18 |   app_name           = var.app_name
19 |   region             = var.region
20 |   image              = var.image
21 |   vpc_id             = module.network.vpc.id
22 |   public_subnet_ids  = [for s in module.network.public_subnets : s.id]
23 |   private_subnet_ids = [for s in module.network.private_subnets : s.id]
24 |   depends_on         = [module.network]
25 | }
26 | 
27 | 
28 | # Outputs
29 | output "alb_dns_name" {
30 |   value = module.ecs.alb_dns_name
31 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/network/main.tf:
--------------------------------------------------------------------------------
 1 | # Define provider
 2 | provider "aws" {
 3 |   region = var.region
 4 |   default_tags {
 5 |     tags = {
 6 |       app = var.app_name
 7 |     }
 8 |   }
 9 | }
10 | 
11 | # Create VPC and IGW
12 | resource "aws_vpc" "this" {
13 |   cidr_block = var.vpc_cidr_block
14 | }
15 | resource "aws_internet_gateway" "this" {
16 |   vpc_id = aws_vpc.this.id
17 | }
18 | 
19 | # Create public subnets
20 | resource "aws_subnet" "public_subnets" {
21 |   count             = length(var.availability_zones)
22 |   vpc_id            = aws_vpc.this.id
23 |   cidr_block        = var.public_cidr_blocks[count.index]
24 |   availability_zone = var.availability_zones[count.index]
25 | }
26 | 
27 | # Create routing tables for public subnets
28 | resource "aws_route_table" "public" {
29 |   vpc_id = aws_vpc.this.id
30 |   route {
31 |     cidr_block = "0.0.0.0/0"
32 |     gateway_id = aws_internet_gateway.this.id
33 |   }
34 | }
35 | resource "aws_route_table_association" "publics" {
36 |   count          = length(var.availability_zones)
37 |   subnet_id      = element(aws_subnet.public_subnets.*.id, count.index)
38 |   route_table_id = aws_route_table.public.id
39 | }
40 | 
41 | 
42 | # Create Elastic IPs and NAT Gateways
43 | resource "aws_eip" "eips" {
44 |   count  = length(var.availability_zones)
45 |   domain = "vpc"
46 | }
47 | resource "aws_nat_gateway" "this" {
48 |   count         = length(var.availability_zones)
49 |   subnet_id     = element(aws_subnet.public_subnets.*.id, count.index)
50 |   allocation_id = element(aws_eip.eips.*.id, count.index)
51 | }
52 | 
53 | # Create private subnets
54 | resource "aws_subnet" "private_subnets" {
55 |   count             = length(var.availability_zones)
56 |   vpc_id            = aws_vpc.this.id
57 |   cidr_block        = var.private_cidr_blocks[count.index]
58 |   availability_zone = var.availability_zones[count.index]
59 | }
60 | 
61 | # Create routing tables for private subnets
62 | resource "aws_route_table" "private" {
63 |   count  = length(var.availability_zones)
64 |   vpc_id = aws_vpc.this.id
65 |   route {
66 |     cidr_block     = "0.0.0.0/0"
67 |     nat_gateway_id = element(aws_nat_gateway.this.*.id, count.index)
68 |   }
69 | }
70 | resource "aws_route_table_association" "privates" {
71 |   count          = length(var.availability_zones)
72 |   subnet_id      = element(aws_subnet.private_subnets.*.id, count.index)
73 |   route_table_id = element(aws_route_table.private.*.id, count.index)
74 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/network/outputs.tf:
--------------------------------------------------------------------------------
1 | output "vpc" {
2 |   value = aws_vpc.this
3 | }
4 | output "public_subnets" {
5 |   value = aws_subnet.public_subnets
6 | }
7 | output "private_subnets" {
8 |   value = aws_subnet.private_subnets
9 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/network/variable.tf:
--------------------------------------------------------------------------------
 1 | variable "app_name" {
 2 |   type = string
 3 | }
 4 | variable "region" {
 5 |   type = string
 6 | }
 7 | variable "vpc_cidr_block" {
 8 |   type    = string
 9 |   default = "10.0.0.0/16"
10 | }
11 | variable "availability_zones" {
12 |   type    = list(string)
13 |   default = ["us-east-1a", "us-east-1f"]
14 | }
15 | variable "public_cidr_blocks" {
16 |   type    = list(string)
17 |   default = ["10.0.1.0/24", "10.0.2.0/24"]
18 | }
19 | variable "private_cidr_blocks" {
20 |   type    = list(string)
21 |   default = ["10.0.11.0/24", "10.0.12.0/24"]
22 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/app/variable.tf:
--------------------------------------------------------------------------------
 1 | variable "app_name" {
 2 |   description = "Name of the app."
 3 |   type        = string
 4 | }
 5 | variable "region" {
 6 |   description = "AWS region to deploy the network to."
 7 |   type        = string
 8 | }
 9 | variable "image" {
10 |   description = "Image used to start the container. Should be in repository-url/image:tag format."
11 |   type        = string
12 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/setup/main.tf:
--------------------------------------------------------------------------------
1 | resource "aws_ecr_repository" "this" {
2 |   name = "${var.app_name}"
3 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/setup/output.tf:
--------------------------------------------------------------------------------
1 | output "ecr_repo_url" {
2 |     value = aws_ecr_repository.this.repository_url
3 | }


--------------------------------------------------------------------------------
/serving_endpoint/ecs/infra/setup/variable.tf:
--------------------------------------------------------------------------------
1 | variable "app_name" {
2 |   description = "Name of the app."
3 |   type = string
4 | }


--------------------------------------------------------------------------------
/serving_endpoint/example.env:
--------------------------------------------------------------------------------
1 | AWS_ACCESS_KEY_ID=
2 | AWS_SECRET_ACCESS_KEY=
3 | AWS_DEFAULT_REGION=
4 | GUARDRAILS_TOKEN=
5 | 


--------------------------------------------------------------------------------
/serving_endpoint/imgs/Model-deployment.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/Model-deployment.webp


--------------------------------------------------------------------------------
/serving_endpoint/imgs/litserve-deployment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/litserve-deployment.png


--------------------------------------------------------------------------------
/serving_endpoint/imgs/litserve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/litserve.png


--------------------------------------------------------------------------------
/serving_endpoint/imgs/model-registry.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/serving_endpoint/imgs/model-registry.webp


--------------------------------------------------------------------------------
/serving_endpoint/requirements.txt:
--------------------------------------------------------------------------------
 1 | mlflow-skinny==2.17.2
 2 | sagemaker-mlflow==0.1.0
 3 | cloudpickle==3.1.0
 4 | colorama==0.4.6
 5 | langchain-aws==0.2.7
 6 | langchain-community==0.3.4
 7 | langchain-ollama==0.2.0
 8 | langchain-openai==0.2.9
 9 | langchain==0.3.7
10 | langgraph==0.2.27
11 | numpy==1.26.4
12 | psutil==6.1.0
13 | pydantic==2.9.2
14 | rich==13.9.4
15 | tornado==6.4.1
16 | litserve==0.2.4
17 | langchain-qdrant==0.2.0
18 | 
19 | guardrails-ai==0.6.0 ; python_version >= "3.10" and python_version < "3.13" \
20 |     --hash=sha256:6bd634b56ef34c6027ea066ea411f895261f14204e0592bdefb446875ce68eea \
21 |     --hash=sha256:a11a0aad96ecbb412bce58533fcaaa03ca6d21872f5bad02babffe4959a13e17
22 | guardrails-api-client==0.4.0a1 ; python_version >= "3.10" and python_version < "3.13" \
23 |     --hash=sha256:102e70cd53704298cd3d71c58bdac71bc6bfa2c341bc3b336a4ec434c540e9b4 \
24 |     --hash=sha256:163352bc09b295966d206bc5e912edb29fc3cae8f7749a6ceea1a80aae816029
25 | guardrails-hub-types==0.0.4 ; python_version >= "3.10" and python_version < "3.13" \
26 |     --hash=sha256:a5ad863fd6b354e41c3d83d8c1e835f7243a0ae0ec944b6601d1e3ee5d806a51 \
27 |     --hash=sha256:ad86faecc142e853d6c6fa24ecb50723c86f834fbe1f2e6b932f2921a08b60b3
28 | pandas


--------------------------------------------------------------------------------
/serving_endpoint/server.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from pathlib import Path
  4 | from typing import Any, Dict, List
  5 | 
  6 | import litserve as ls
  7 | import mlflow
  8 | import mlflow.langchain
  9 | from pydantic import BaseModel, constr
 10 | 
 11 | # ==== DEFINING INPUT / OUTPUT SCHEMAS WITH EXAMPLES ====
 12 | 
 13 | 
 14 | class MessageModel(BaseModel):
 15 |     role: str = "user"
 16 |     content: str = "What is the company's sick leave policy?"
 17 | 
 18 | 
 19 | # Input Format
 20 | class TextRequestModel(BaseModel):
 21 |     messages: List[MessageModel] = [
 22 |         MessageModel(role="user", content="What is the company's sick leave policy?"),
 23 |         MessageModel(
 24 |             role="assistant",
 25 |             content="The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.",
 26 |         ),
 27 |         MessageModel(role="user", content="What is the meaning of life?"),
 28 |     ]
 29 |     vector_store_path: str = "http://host.docker.internal:6333"
 30 | 
 31 | 
 32 | # Output Format
 33 | class TextResponseModel(BaseModel):
 34 |     response: str = "The company's sick leave policy allows employees to take a certain number of sick days per year."
 35 |     source_documents: Any = None
 36 |     model_uri: str = "models:/rag-chatbot/latest"
 37 | 
 38 | 
 39 | class LangchainRAGAPI(ls.LitAPI):
 40 |     def setup(self, device):
 41 |         """Initialize the model and any required resources"""
 42 |         # Configuration - these could be moved to environment variables
 43 |         self.model_uri = "models:/rag-chatbot-with-guardrails/latest"
 44 |         self.cache_dir = "/tmp/mlflow_cache"
 45 | 
 46 |         # server uri
 47 |         tracking_uri: str = "http://127.0.0.1:5001"
 48 |         registry_uri: str = "http://127.0.0.1:5001"
 49 | 
 50 |         mlflow.set_tracking_uri(tracking_uri)
 51 |         mlflow.set_registry_uri(registry_uri)
 52 | 
 53 |         # Initialize cache directory
 54 |         os.makedirs(self.cache_dir, exist_ok=True)
 55 | 
 56 |         # Load the chain
 57 |         self._initialize_chain()
 58 | 
 59 |     def _initialize_chain(self):
 60 |         """Load the chain with caching support"""
 61 |         try:
 62 |             cache_key = self.model_uri.replace("/", "_").replace(":", "_")
 63 |             cache_path = Path(self.cache_dir) / f"{cache_key}.json"
 64 | 
 65 |             if cache_path.exists():
 66 |                 logging.info(f"Loading chain from cache: {cache_path}")
 67 |                 self.chain = mlflow.langchain.load_model(self.model_uri)
 68 |             else:
 69 |                 logging.info(f"Loading chain from MLflow: {self.model_uri}")
 70 |                 self.chain = mlflow.langchain.load_model(self.model_uri)
 71 | 
 72 |         except Exception as e:
 73 |             logging.error(f"Error loading chain: {str(e)}")
 74 |             raise
 75 | 
 76 |     def decode_request(self, request: TextRequestModel) -> Dict[str, Any]:
 77 |         """Decode and validate the incoming request"""
 78 |         return {
 79 |             "messages": [message.dict() for message in request.messages],
 80 |             "vector_store_path": request.vector_store_path,
 81 |         }
 82 | 
 83 |     def predict(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
 84 |         """Make prediction using the loaded chain"""
 85 |         try:
 86 |             # Invoke the chain
 87 |             result = self.chain.invoke(request_data)
 88 | 
 89 |             # Handle different response formats
 90 |             if isinstance(result, dict):
 91 |                 response = {
 92 |                     "response": result.get("result", result.get("response", str(result))),
 93 |                     "source_documents": result.get("sources"),
 94 |                 }
 95 |             else:
 96 |                 logging.warning(f"Mismatched response format: {type(result)} - {result}")
 97 |                 response = {"response": str(result)}
 98 | 
 99 |             return response
100 | 
101 |         except Exception as e:
102 |             logging.error(f"Prediction error: {str(e)}")
103 |             raise
104 | 
105 |     def encode_response(self, prediction: Dict[str, Any]) -> TextResponseModel:
106 |         """Encode the prediction result into the final response format"""
107 |         return TextResponseModel(
108 |             response=prediction.get("response"),
109 |             source_documents=prediction.get("source_documents"),
110 |             model_uri=self.model_uri,
111 |         )
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     # Configure logging
116 |     logging.basicConfig(level=logging.INFO)
117 | 
118 |     # Initialize and start the server
119 |     api = LangchainRAGAPI()
120 |     server = ls.LitServer(api, accelerator="auto")
121 |     server.run(port=8000)
122 | 
123 |     ############################################# Run in terminal #######################################
124 |     # litserve dockerize server.py --port 8000 --gpu
125 | 


--------------------------------------------------------------------------------
/src/llmops_project/__init__.py:
--------------------------------------------------------------------------------
1 | """LLMOps Package Production Ready template using open source technologies.."""
2 | 


--------------------------------------------------------------------------------
/src/llmops_project/__main__.py:
--------------------------------------------------------------------------------
 1 | """Entry point of the package."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from llmops_project import scripts
 6 | 
 7 | # %% MAIN
 8 | 
 9 | if __name__ == "__main__":
10 |     scripts.main()
11 | 


--------------------------------------------------------------------------------
/src/llmops_project/io/configs.py:
--------------------------------------------------------------------------------
 1 | """Parse, merge, and convert config objects."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | import typing as T
 6 | 
 7 | import omegaconf as oc
 8 | 
 9 | # %% TYPES
10 | 
11 | Config = oc.ListConfig | oc.DictConfig
12 | 
13 | # %% PARSERS
14 | 
15 | 
16 | def parse_file(path: str) -> Config:
17 |     """Parse a config file from a path.
18 | 
19 |     Args:
20 |         path (str): path to local config.
21 | 
22 |     Returns:
23 |         Config: representation of the config file.
24 |     """
25 |     return oc.OmegaConf.load(path)
26 | 
27 | 
28 | def parse_string(string: str) -> Config:
29 |     """Parse the given config string.
30 | 
31 |     Args:
32 |         string (str): content of config string.
33 | 
34 |     Returns:
35 |         Config: representation of the config string.
36 |     """
37 |     return oc.OmegaConf.create(string)
38 | 
39 | 
40 | # %% MERGERS
41 | 
42 | 
43 | def merge_configs(configs: T.Sequence[Config]) -> Config:
44 |     """Merge a list of config into a single config.
45 | 
46 |     Args:
47 |         configs (T.Sequence[Config]): list of configs.
48 | 
49 |     Returns:
50 |         Config: representation of the merged config objects.
51 |     """
52 |     return oc.OmegaConf.merge(*configs)
53 | 
54 | 
55 | # %% CONVERTERS
56 | 
57 | 
58 | def to_object(config: Config, resolve: bool = True) -> object:
59 |     """Convert a config object to a python object.
60 | 
61 |     Args:
62 |         config (Config): representation of the config.
63 |         resolve (bool): resolve variables. Defaults to True.
64 | 
65 |     Returns:
66 |         object: conversion of the config to a python object.
67 |     """
68 |     return oc.OmegaConf.to_container(config, resolve=resolve)
69 | 


--------------------------------------------------------------------------------
/src/llmops_project/io/services.py:
--------------------------------------------------------------------------------
  1 | """Manage global context during execution."""
  2 | 
  3 | # %% IMPORTS
  4 | 
  5 | from __future__ import annotations
  6 | 
  7 | import abc
  8 | import contextlib as ctx
  9 | import os
 10 | import sys
 11 | import typing as T
 12 | 
 13 | import dotenv
 14 | import loguru
 15 | import mlflow
 16 | import mlflow.tracking as mt
 17 | import pydantic as pdt
 18 | 
 19 | # %% SERVICES
 20 | 
 21 | 
 22 | class Service(abc.ABC, pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
 23 |     """Base class for a global service.
 24 | 
 25 |     Use services to manage global contexts.
 26 |     e.g., logger object, mlflow client, spark context, ...
 27 |     """
 28 | 
 29 |     @abc.abstractmethod
 30 |     def start(self) -> None:
 31 |         """Start the service."""
 32 | 
 33 |     def stop(self) -> None:
 34 |         """Stop the service."""
 35 |         # does nothing by default
 36 | 
 37 | 
 38 | class LoggerService(Service, frozen=True):
 39 |     """Service for logging messages.
 40 | 
 41 |     https://loguru.readthedocs.io/en/stable/api/logger.html
 42 | 
 43 |     Parameters:
 44 |         sink (str): logging output.
 45 |         level (str): logging level.
 46 |         format (str): logging format.
 47 |         colorize (bool): colorize output.
 48 |         serialize (bool): convert to JSON.
 49 |         backtrace (bool): enable exception trace.
 50 |         diagnose (bool): enable variable display.
 51 |         catch (bool): catch errors during log handling.
 52 |     """
 53 | 
 54 |     sink: str = "stderr"
 55 |     level: str = "DEBUG"
 56 |     format: str = (
 57 |         "<green>[{time:YYYY-MM-DD HH:mm:ss.SSS}]</green>"
 58 |         "<level>[{level}]</level>"
 59 |         "<cyan>[{name}:{function}:{line}]</cyan>"
 60 |         " <level>{message}</level>"
 61 |     )
 62 |     colorize: bool = True
 63 |     serialize: bool = False
 64 |     backtrace: bool = True
 65 |     diagnose: bool = False
 66 |     catch: bool = True
 67 | 
 68 |     @T.override
 69 |     def start(self) -> None:
 70 |         loguru.logger.remove()
 71 |         config = self.model_dump()
 72 |         # use standard sinks or keep the original
 73 |         sinks = {"stderr": sys.stderr, "stdout": sys.stdout}
 74 |         config["sink"] = sinks.get(config["sink"], config["sink"])
 75 |         loguru.logger.add(**config)
 76 | 
 77 |     def logger(self) -> loguru.Logger:
 78 |         """Return the main logger.
 79 | 
 80 |         Returns:
 81 |             loguru.Logger: the main logger.
 82 |         """
 83 |         return loguru.logger
 84 | 
 85 | 
 86 | class MlflowService(Service):  # type: ignore[misc]
 87 |     """Service for Mlflow tracking and registry.
 88 | 
 89 |     Parameters:
 90 |         tracking_uri (str): the URI for the Mlflow tracking server.
 91 |         registry_uri (str): the URI for the Mlflow model registry.
 92 |         experiment_name (str): the name of tracking experiment.
 93 |         registry_name (str): the name of model registry.
 94 |         autolog_disable (bool): disable autologging.
 95 |         autolog_disable_for_unsupported_versions (bool): disable autologging for unsupported versions.
 96 |         autolog_exclusive (bool): If True, enables exclusive autologging.
 97 |         autolog_log_input_examples (bool): If True, logs input examples during autologging.
 98 |         autolog_log_model_signatures (bool): If True, logs model signatures during autologging.
 99 |         autolog_log_models (bool): If True, enables logging of models during autologging.
100 |         autolog_log_datasets (bool): If True, logs datasets used during autologging.
101 |         autolog_silent (bool): If True, suppresses all Mlflow warnings during autologging.
102 |     """
103 | 
104 |     class RunConfig(pdt.BaseModel, strict=True, frozen=True, extra="forbid"):
105 |         """Run configuration for Mlflow tracking.
106 | 
107 |         Parameters:
108 |             name (str): name of the run.
109 |             description (str | None): description of the run.
110 |             tags (dict[str, T.Any] | None): tags for the run.
111 |             log_system_metrics (bool | None): enable system metrics logging.
112 |         """
113 | 
114 |         name: str
115 |         description: str | None = None
116 |         tags: dict[str, T.Any] | None = None
117 |         log_system_metrics: bool | None = True
118 | 
119 |     dotenv.load_dotenv()
120 | 
121 |     # Check if the environment variables are set
122 |     if "AWS_ACCESS_KEY_ID" not in os.environ:
123 |         raise
124 |     if "AWS_REGION" not in os.environ:
125 |         raise
126 |     if "AWS_SECRET_ACCESS_KEY" not in os.environ:
127 |         raise
128 | 
129 |     # server uri
130 |     tracking_uri: str = "http://127.0.0.1:5001"
131 |     registry_uri: str = "http://127.0.0.1:5001"
132 |     # experiment
133 |     experiment_name: str = "rag_chatbot_experiment"
134 |     # registry
135 |     registry_name: str = "my_model_registry"
136 |     # autolog
137 |     autolog_disable: bool = False
138 |     autolog_disable_for_unsupported_versions: bool = False
139 |     autolog_exclusive: bool = False
140 |     autolog_log_input_examples: bool = True
141 |     autolog_log_model_signatures: bool = True
142 |     autolog_log_models: bool = False
143 |     autolog_log_datasets: bool = False
144 |     autolog_silent: bool = False
145 | 
146 |     @T.override
147 |     def start(self) -> None:
148 |         # server uri
149 |         mlflow.set_tracking_uri(uri=self.tracking_uri)
150 |         mlflow.set_registry_uri(uri=self.registry_uri)
151 |         # experiment
152 |         mlflow.set_experiment(experiment_name=self.experiment_name)
153 |         # autolog
154 |         mlflow.autolog(
155 |             disable=self.autolog_disable,
156 |             disable_for_unsupported_versions=self.autolog_disable_for_unsupported_versions,
157 |             exclusive=self.autolog_exclusive,
158 |             log_input_examples=self.autolog_log_input_examples,
159 |             log_model_signatures=self.autolog_log_model_signatures,
160 |             log_datasets=self.autolog_log_datasets,
161 |             silent=self.autolog_silent,
162 |         )
163 | 
164 |     @ctx.contextmanager
165 |     def run_context(self, run_config: RunConfig) -> T.Generator[mlflow.ActiveRun, None, None]:
166 |         """Yield an active Mlflow run and exit it afterwards.
167 | 
168 |         Args:
169 |             run (str): run parameters.
170 | 
171 |         Yields:
172 |             T.Generator[mlflow.ActiveRun, None, None]: active run context. Will be closed as the end of context.
173 |         """
174 |         with mlflow.start_run(
175 |             run_name=run_config.name,
176 |             tags=run_config.tags,
177 |             description=run_config.description,
178 |             log_system_metrics=run_config.log_system_metrics,
179 |         ) as run:
180 |             yield run
181 | 
182 |     def client(self) -> mt.MlflowClient:
183 |         """Return a new Mlflow client.
184 | 
185 |         Returns:
186 |             MlflowClient: the mlflow client.
187 |         """
188 |         return mt.MlflowClient(tracking_uri=self.tracking_uri, registry_uri=self.registry_uri)
189 | 


--------------------------------------------------------------------------------
/src/llmops_project/io/vector_db.py:
--------------------------------------------------------------------------------
  1 | # Create Abstract class to Create Delete and Ingest documents to Vector DB
  2 | import os
  3 | from abc import ABC, abstractmethod
  4 | from typing import List, Optional
  5 | 
  6 | # Import utility functions
  7 | from langchain_aws import BedrockEmbeddings
  8 | from langchain_community.docstore.in_memory import InMemoryDocstore
  9 | from langchain_community.document_loaders import DirectoryLoader
 10 | from langchain_community.vectorstores import FAISS
 11 | from langchain_core.documents import Document
 12 | from langchain_qdrant import QdrantVectorStore
 13 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 14 | from PyPDF2 import PdfReader
 15 | from qdrant_client import QdrantClient
 16 | from qdrant_client.models import Distance, VectorParams
 17 | 
 18 | 
 19 | class VectorDB(ABC):
 20 |     """Vector Database Abstract Class"""
 21 | 
 22 |     def __init__(self, embedding_model: str, embedding_model_size: int, vector_store_path: str):
 23 |         self.embedding_model = embedding_model
 24 |         self.vector_store_path = vector_store_path
 25 |         self.embedding_model_size = embedding_model_size
 26 | 
 27 |     @abstractmethod
 28 |     def create_vector_db(self):
 29 |         pass
 30 | 
 31 |     @abstractmethod
 32 |     def delete_vector_db(self):
 33 |         pass
 34 | 
 35 |     @abstractmethod
 36 |     def ingest_documents(self, document_path: str):
 37 |         pass
 38 | 
 39 | 
 40 | # %% FAISS Vector Database Class
 41 | 
 42 | 
 43 | # Implement Faiss Vector Database Class
 44 | class FAISSVectorDB(VectorDB):
 45 |     """FAISS Vector Database Class"""
 46 | 
 47 |     def __init__(
 48 |         self,
 49 |         embedding_model: str = "amazon.titan-embed-text-v1",
 50 |         embedding_model_size: int = 1536,
 51 |         vector_store_path: str = "faiss_db/",
 52 |     ):
 53 |         super().__init__(embedding_model, embedding_model_size, vector_store_path)
 54 | 
 55 |     def create_vector_db(self):
 56 |         """Create an empty FAISS vector store.
 57 | 
 58 |         Args:
 59 |             config_path (str): Path to the chain's configuration file.
 60 |             vector_store_path (str): Path to save the empty vector store.
 61 |         """
 62 | 
 63 |         from faiss import IndexFlatL2
 64 | 
 65 |         # Load Ollama embeddings with specified model from config
 66 |         embeddings = BedrockEmbeddings(model_id=self.embedding_model)
 67 |         embedding_dimension = self.embedding_model_size
 68 | 
 69 |         # Create an empty FAISS vector store initialized with the embeddings dimension
 70 |         index = IndexFlatL2(embedding_dimension)  # Using L2 distance for the index
 71 | 
 72 |         # Create a local file store for persistent document storage
 73 |         docstore = InMemoryDocstore()
 74 | 
 75 |         # Create the FAISS vector store with the empty index and document store
 76 |         vector_store = FAISS(
 77 |             embedding_function=embeddings, index=index, docstore=docstore, index_to_docstore_id={}
 78 |         )
 79 | 
 80 |         # Save the empty vector store locally
 81 |         vector_store.save_local(folder_path=self.vector_store_path)
 82 |         return vector_store
 83 | 
 84 |     def delete_vector_db(self):
 85 |         # Delete the FAISS vector store
 86 |         try:
 87 |             os.remove(self.vector_store_path)
 88 |         except FileNotFoundError:
 89 |             pass
 90 | 
 91 |     def _load_pdfs_from_directory(self, directory_path: str):
 92 |         documents = []
 93 |         for filename in os.listdir(directory_path):
 94 |             if filename.endswith(".pdf"):
 95 |                 file_path = os.path.join(directory_path, filename)
 96 |                 with open(file_path, "rb") as file:
 97 |                     pdf_reader = PdfReader(file)
 98 |                     for page_num in range(len(pdf_reader.pages)):
 99 |                         page = pdf_reader.pages[page_num]
100 |                         text = page.extract_text()
101 |                         if text:
102 |                             documents.append(
103 |                                 Document(
104 |                                     page_content=text,
105 |                                     metadata={"source": filename, "page": page_num + 1},
106 |                                 )
107 |                             )
108 |         return documents
109 | 
110 |     # %% Main pipeline function for ingesting and updating the vector database
111 |     def ingest_documents(self, document_path) -> None:
112 |         # Load documents from the specified PDF directory
113 |         documents = self._load_pdfs_from_directory(document_path)
114 | 
115 |         # Load Ollama embeddings
116 |         embeddings = BedrockEmbeddings(model_id=self.embedding_model)
117 | 
118 |         if documents:
119 |             # Load the existing FAISS vector store
120 |             vector_store = FAISS.load_local(
121 |                 folder_path=self.vector_store_path,
122 |                 embeddings=embeddings,
123 |                 allow_dangerous_deserialization=True,
124 |             )
125 | 
126 |             # Add documents to the vector store
127 |             vector_store.add_documents(documents)
128 | 
129 |             # Save the updated vector store locally
130 |             vector_store.save_local(folder_path=self.vector_store_path)
131 | 
132 | 
133 | class QdrantVectorDB:
134 |     def __init__(
135 |         self,
136 |         collection_name: str,
137 |         embeddings_model: BedrockEmbeddings,
138 |         url: str = "http://localhost:6333",
139 |         api_key: Optional[str] = None,
140 |         vector_size: int = 1536,
141 |         distance: Distance = Distance.COSINE,
142 |     ):
143 |         """
144 |         Initialize Qdrant vector database and embeddings
145 | 
146 |         :param collection_name: Name of the Qdrant collection
147 |         :param embeddings_model: Langchain embeddings model
148 |         """
149 |         # Initialize Qdrant client (in-memory for this example)
150 |         self.client = QdrantClient(url=url, api_key=api_key)
151 |         self.collection_name = collection_name
152 |         self.embeddings = embeddings_model
153 |         self.vector_size = vector_size
154 |         self.distance = distance
155 | 
156 |     def create_vector_db(self):
157 |         """
158 |         Create a new collection in the Qdrant database
159 | 
160 |         :param collection_name: Name of the collection
161 |         :param vector_size: Size of the vector
162 |         """
163 |         self.client.create_collection(
164 |             collection_name=self.collection_name,
165 |             vectors_config=VectorParams(size=self.vector_size, distance=self.distance),
166 |         )
167 | 
168 |     def ingest_documents(self, folder_path: str, chunk_size=500, chunk_overlap=50) -> List[dict]:
169 |         """
170 |         Load documents from a specified folder
171 | 
172 |         :param folder_path: Path to the folder containing documents
173 |         :return: List of processed documents
174 |         """
175 |         # Load documents from directory
176 |         loader = DirectoryLoader(folder_path)
177 |         documents = loader.load()
178 | 
179 |         # Split documents into chunks
180 |         text_splitter = RecursiveCharacterTextSplitter(
181 |             chunk_size=chunk_size,
182 |             chunk_overlap=chunk_overlap,
183 |         )
184 |         split_docs = text_splitter.split_documents(documents)
185 | 
186 |         # Generate embeddings and prepare for Qdrant
187 |         points = []
188 |         for idx, doc in enumerate(split_docs):
189 |             # Generate embedding
190 |             embedding = self.embeddings.embed_query(doc.page_content)
191 | 
192 |             points.append(
193 |                 {
194 |                     "id": idx,
195 |                     "vector": embedding,
196 |                     "payload": {"page_content": doc.page_content, "metadata": doc.metadata},
197 |                 }
198 |             )
199 | 
200 |         # Upsert points into Qdrant
201 |         self.client.upsert(collection_name=self.collection_name, points=points)
202 | 
203 |         return points
204 | 
205 |     def load_documents_via_langchain(self, folder_path) -> QdrantVectorStore:
206 |         """
207 |         Load documents from a specified folder
208 | 
209 |         :param folder_path: Path to the folder containing documents
210 |         :return: List of processed documents
211 |         """
212 | 
213 |         # Load documents from directory
214 |         loader = DirectoryLoader(folder_path)
215 |         documents = loader.load()
216 | 
217 |         doc_store = QdrantVectorStore.from_documents(
218 |             documents,
219 |             self.embeddings,
220 |             url="http://localhost:6333",
221 |             collection_name=self.collection_name,
222 |         )
223 |         return doc_store
224 | 
225 |     def query_database(self, query: str, top_k: int = 5) -> List[dict]:
226 |         """
227 |         Query the vector database
228 | 
229 |         :param query: Search query string
230 |         :param top_k: Number of top results to return
231 |         :return: List of top matching documents
232 |         """
233 |         # Generate embedding for the query
234 |         query_embedding = self.embeddings.embed_query(query)
235 | 
236 |         # Perform search
237 |         search_result = self.client.search(
238 |             collection_name=self.collection_name, query_vector=query_embedding, limit=top_k
239 |         )
240 | 
241 |         if not search_result:
242 |             return [{"score": 0.0, "text": "", "source": ""}]
243 | 
244 |         return [
245 |             {
246 |                 "score": result.score,
247 |                 "text": result.payload.get("text", "") if result.payload else "",
248 |                 "source": result.payload.get("source", "") if result.payload else "",
249 |             }
250 |             for result in search_result
251 |         ]
252 | 


--------------------------------------------------------------------------------
/src/llmops_project/models/rag_chatbot.py:
--------------------------------------------------------------------------------
  1 | # mypy: ignore-errors
  2 | import os
  3 | from operator import itemgetter
  4 | from pathlib import Path
  5 | from typing import Any, Dict, List
  6 | 
  7 | import mlflow
  8 | from langchain.prompts import PromptTemplate
  9 | from langchain.schema.output_parser import StrOutputParser
 10 | from langchain.schema.runnable import RunnableBranch, RunnableLambda, RunnablePassthrough
 11 | from langchain_aws import BedrockEmbeddings, ChatBedrock
 12 | from langchain_community.vectorstores import FAISS
 13 | from langchain_core.documents import Document
 14 | 
 15 | # Get the current working directory
 16 | script_dir = Path(os.getcwd())
 17 | 
 18 | # Navigate up to the parent folder (you can use .parent to go up one level)
 19 | parent_dir = script_dir.parent
 20 | grandparent_dir = parent_dir.parent  # Go up one more level
 21 | 
 22 | # Combine the path to reach the config directory
 23 | config_path = "rag_chain_config.yaml"
 24 | 
 25 | ## Enable MLflow Tracing
 26 | mlflow.langchain.autolog()
 27 | 
 28 | print("CONFIG PATH", config_path)
 29 | model_config = mlflow.models.ModelConfig(development_config=config_path)
 30 | 
 31 | guardrail_config = model_config.get("guardrail_config")
 32 | llm_config = model_config.get("llm_config")
 33 | retriever_config = model_config.get("retriever_config")
 34 | 
 35 | 
 36 | # The question is the last entry of the history
 37 | def extract_question(input: List[Dict[str, Any]]) -> str:
 38 |     """
 39 |     Extract the question from the input.
 40 | 
 41 |     Args:
 42 |         input (list[dict]): The input containing chat messages.
 43 | 
 44 |     Returns:
 45 |         str: The extracted question.
 46 |     """
 47 |     return input[-1]["content"]
 48 | 
 49 | 
 50 | # The history is everything before the last question
 51 | def extract_history(input: List[Dict[str, str]]) -> List[Dict[str, str]]:
 52 |     """
 53 |     Extract the chat history from the input.
 54 | 
 55 |     Args:
 56 |         input (list[dict]): The input containing chat messages.
 57 | 
 58 |     Returns:
 59 |         list[dict]: The extracted chat history.
 60 |     """
 61 |     return input[:-1]
 62 | 
 63 | 
 64 | # TODO: Convert to Few Shot Prompt
 65 | guardrail_prompt = PromptTemplate(
 66 |     input_variables=["chat_history", "question"],
 67 |     template=guardrail_config["prompt"],
 68 | )
 69 | 
 70 | guardrail_model = ChatBedrock(
 71 |     model_id=guardrail_config["model"],
 72 |     model_kwargs=dict(temperature=0.01),
 73 | )
 74 | 
 75 | chat_model = ChatBedrock(
 76 |     model_id=llm_config["llm_model"],
 77 |     model_kwargs=dict(temperature=0.01),
 78 | )
 79 | 
 80 | 
 81 | guardrail_chain = (
 82 |     {
 83 |         "question": itemgetter("messages") | RunnableLambda(extract_question),
 84 |         "chat_history": itemgetter("messages") | RunnableLambda(extract_history),
 85 |     }
 86 |     | guardrail_prompt
 87 |     | guardrail_model
 88 |     | StrOutputParser()
 89 | )
 90 | 
 91 | 
 92 | def get_retriever(path: str):
 93 |     """
 94 |     Get the FAISS retriever.
 95 | 
 96 |     Args:
 97 |         path (str, optional): The path to the vector store. Defaults to None.
 98 | 
 99 |     Returns:
100 |         FAISS: The FAISS retriever.
101 |     """
102 |     # Ensure the config path is relative to this script's location
103 |     # Load Vector Store
104 |     # Get the FAISS retriever
105 |     embeddings = BedrockEmbeddings()
106 |     vector_store = FAISS.load_local(
107 |         embeddings=embeddings,
108 |         folder_path=path,
109 |         allow_dangerous_deserialization=True,
110 |     )
111 | 
112 |     # configure document retrieval
113 |     retriever = vector_store.as_retriever(
114 |         search_kwargs={"k": retriever_config.get("parameters")["k"]}
115 |     )
116 |     return retriever
117 | 
118 | 
119 | # Setup Prompt to re-write query from chat history context
120 | generate_query_to_retrieve_context_prompt = PromptTemplate(
121 |     input_variables=["chat_history", "question"],
122 |     template=llm_config["query_rewriter_prompt_template"],
123 | )
124 | 
125 | 
126 | # Setup query rewriter chain
127 | generate_query_to_retrieve_context_chain = {
128 |     "question": itemgetter("messages") | RunnableLambda(extract_question),
129 |     "chat_history": itemgetter("messages") | RunnableLambda(extract_history),
130 | } | RunnableBranch(  # Augment query only when there is a chat history
131 |     (
132 |         lambda x: x["chat_history"],
133 |         generate_query_to_retrieve_context_prompt | chat_model | StrOutputParser(),
134 |     ),
135 |     (lambda x: not x["chat_history"], RunnableLambda(lambda x: x["question"])),
136 |     RunnableLambda(lambda x: x["question"]),
137 | )  # type: ignore
138 | 
139 | 
140 | question_with_history_and_context_prompt = PromptTemplate(
141 |     input_variables=["chat_history", "context", "question"],
142 |     template=llm_config.get("llm_prompt_template"),  # Add Question with History and Context Prompt
143 | )
144 | 
145 | 
146 | def format_context(docs: List[Document]) -> str:
147 |     """
148 |     Format the context from a list of documents.
149 | 
150 |     Args:
151 |         docs (list[Document]): A list of documents.
152 | 
153 |     Returns:
154 |         str: A formatted string containing the content of the documents.
155 |     """
156 |     return "\n\n".join([d.page_content for d in docs])
157 | 
158 | 
159 | def extract_source_urls(docs: List[Document]) -> List[str]:
160 |     """
161 |     Extract source URLs from a list of documents.
162 | 
163 |     Args:
164 |         docs (list[Document]): A list of documents.
165 | 
166 |     Returns:
167 |         list[str]: A list of source URLs extracted from the documents' metadata.
168 |     """
169 |     return [d.metadata[retriever_config.get("schema")["document_uri"]] for d in docs]
170 | 
171 | 
172 | relevant_question_chain = (
173 |     RunnablePassthrough()  # type: ignore
174 |     | {
175 |         "relevant_docs": generate_query_to_retrieve_context_prompt | chat_model | StrOutputParser(),
176 |         "chat_history": itemgetter("chat_history"),
177 |         "question": itemgetter("question"),
178 |         "vector_store_path": itemgetter("vector_store_path"),
179 |     }
180 |     | {
181 |         "relevant_docs": itemgetter("relevant_docs"),
182 |         "chat_history": itemgetter("chat_history"),
183 |         "question": itemgetter("question"),
184 |         "vector_store_path": itemgetter("vector_store_path"),
185 |     }
186 |     | RunnableLambda(
187 |         lambda x: {
188 |             "relevant_docs": get_retriever(x["vector_store_path"]).get_relevant_documents(
189 |                 x["relevant_docs"]
190 |             ),
191 |             "chat_history": x["chat_history"],
192 |             "question": x["question"],
193 |             "vector_store_path": x["vector_store_path"],
194 |         }
195 |     )
196 |     | {
197 |         "context": itemgetter("relevant_docs") | RunnableLambda(format_context),
198 |         "sources": itemgetter("relevant_docs") | RunnableLambda(extract_source_urls),
199 |         "chat_history": itemgetter("chat_history"),
200 |         "question": itemgetter("question"),
201 |     }
202 |     | {"prompt": question_with_history_and_context_prompt, "sources": itemgetter("sources")}
203 |     | {
204 |         "result": itemgetter("prompt") | chat_model | StrOutputParser(),
205 |         "sources": itemgetter("sources"),
206 |     }
207 | )
208 | 
209 | 
210 | irrelevant_question_chain = RunnableLambda(
211 |     lambda x: {"result": llm_config.get("llm_refusal_fallback_answer"), "sources": []}
212 | )
213 | 
214 | branch_node = RunnableBranch(
215 |     (lambda x: "yes" in x["question_is_relevant"].lower(), relevant_question_chain),
216 |     (lambda x: "no" in x["question_is_relevant"].lower(), irrelevant_question_chain),
217 |     irrelevant_question_chain,
218 | )  # type: ignore
219 | 
220 | full_chain = {
221 |     "question_is_relevant": guardrail_chain,
222 |     "question": itemgetter("messages") | RunnableLambda(extract_question),
223 |     "chat_history": itemgetter("messages") | RunnableLambda(extract_history),
224 |     "vector_store_path": itemgetter("vector_store_path"),
225 | } | branch_node  # type: ignore
226 | 
227 | 
228 | ## Tell MLflow logging where to find your chain.
229 | mlflow.models.set_model(model=full_chain)  # type: ignore
230 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | """High-level pipelines of the project."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob
 6 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob
 7 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob
 8 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob
 9 | from llmops_project.pipelines.managers.deployment_manager import DeploymentJob
10 | from llmops_project.pipelines.managers.feature_engineering_manager import FeatureEngineeringJob
11 | from llmops_project.pipelines.managers.monitoring_manager import MonitoringJob
12 | from llmops_project.pipelines.monitoring.generate_rag_dataset import GenerateRagDatasetJob
13 | from llmops_project.pipelines.monitoring.post_deploy_eval import MonitoringEvalJob
14 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob
15 | 
16 | # %% TYPES
17 | 
18 | JobKind = (
19 |     DeploymentJob
20 |     | FeatureEngineeringJob
21 |     | GenerateRagDatasetJob
22 |     | EvaluateModelJob
23 |     | CreateVectorDBJob
24 |     | IngestAndUpdateVectorDBJob
25 |     | DeployModelJob
26 |     | LogAndRegisterModelJob
27 |     | MonitoringEvalJob
28 |     | MonitoringJob
29 | )
30 | 
31 | # %% EXPORTS
32 | 
33 | __all__ = [
34 |     "DeploymentJob",
35 |     "FeatureEngineeringJob",
36 |     "GenerateRagDatasetJob",
37 |     "EvaluateModelJob",
38 |     "CreateVectorDBJob",
39 |     "IngestAndUpdateVectorDBJob",
40 |     "DeployModelJob",
41 |     "LogAndRegisterModelJob",
42 |     "MonitoringEvalJob",
43 |     "MonitoringJob",
44 |     "JobKind",
45 | ]
46 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/base.py:
--------------------------------------------------------------------------------
 1 | """Base for high-level project jobs."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | import abc
 6 | import types as TS
 7 | import typing as T
 8 | 
 9 | import pydantic as pdt
10 | 
11 | from llmops_project.io import services
12 | 
13 | # %% TYPES
14 | 
15 | # Local job variables
16 | Locals = T.Dict[str, T.Any]
17 | 
18 | # %% JOBS
19 | 
20 | 
21 | class Job(abc.ABC, pdt.BaseModel, strict=True, frozen=True):
22 |     """Base class for a job.
23 | 
24 |     use a job to execute runs in  context.
25 |     e.g., to define common services like logger
26 | 
27 |     Parameters:
28 |         logger_service (services.LoggerService): manage the logger system.
29 |         alerts_service (services.AlertsService): manage the alerts system.
30 |         mlflow_service (services.MlflowService): manage the mlflow system.
31 |     """
32 | 
33 |     KIND: str
34 | 
35 |     logger_service: services.LoggerService = services.LoggerService()
36 |     mlflow_service: services.MlflowService = services.MlflowService()
37 | 
38 |     def __enter__(self) -> T.Self:
39 |         """Enter the job context.
40 | 
41 |         Returns:
42 |             T.Self: return the current object.
43 |         """
44 |         self.logger_service.start()
45 |         logger = self.logger_service.logger()
46 |         logger.debug("[START] Logger service: {}", self.logger_service)
47 |         logger.debug("[START] Mlflow service: {}", self.mlflow_service)
48 |         self.mlflow_service.start()
49 |         return self
50 | 
51 |     def __exit__(
52 |         self,
53 |         exc_type: T.Type[BaseException] | None,
54 |         exc_value: BaseException | None,
55 |         exc_traceback: TS.TracebackType | None,
56 |     ) -> T.Literal[False]:
57 |         """Exit the job context.
58 | 
59 |         Args:
60 |             exc_type (T.Type[BaseException] | None): ignored.
61 |             exc_value (BaseException | None): ignored.
62 |             exc_traceback (TS.TracebackType | None): ignored.
63 | 
64 |         Returns:
65 |             T.Literal[False]: always propagate exceptions.
66 |         """
67 |         logger = self.logger_service.logger()
68 |         logger.debug("[STOP] Mlflow service: {}", self.mlflow_service)
69 |         self.mlflow_service.stop()
70 |         logger.debug("[STOP] Logger service: {}", self.logger_service)
71 |         self.logger_service.stop()
72 |         return False  # re-raise
73 | 
74 |     @abc.abstractmethod
75 |     def run(self) -> Locals:
76 |         """Run the job in context.
77 | 
78 |         Returns:
79 |             Locals: local job variables.
80 |         """
81 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/deployment/deploy_model.py:
--------------------------------------------------------------------------------
 1 | import typing as T
 2 | 
 3 | from llmops_project.pipelines import base
 4 | 
 5 | 
 6 | # %% Job class for logging and registering the RAG model
 7 | class DeployModelJob(base.Job):  # type: ignore[misc]
 8 |     """Job to log and register the RAG model in MLflow.
 9 | 
10 |     Parameters:
11 |         run_config (services.MlflowService.RunConfig): mlflow run config.
12 |     """
13 | 
14 |     KIND: T.Literal["DeployModelJob"] = "DeployModelJob"
15 | 
16 |     staging_alias: str = "champion"
17 |     production_alias: str = "production"
18 |     registry_model_name: str
19 | 
20 |     def promote_model_to_alias(
21 |         self, client, model_name, current_alias: str = "champion", new_alias: str = "production"
22 |     ) -> None:
23 |         logger = self.logger_service.logger()
24 | 
25 |         # Retrieve the model version using the current alias
26 |         model_version = client.get_model_version_by_alias(name=model_name, alias=current_alias)
27 | 
28 |         # Access and print the tags of the model version
29 |         if model_version.tags["passed_tests"] == "True":
30 |             logger.success("Model version passed tests, promoting to production")
31 |             # Set the new alias to the retrieved model version
32 |             client.set_registered_model_alias(
33 |                 name=model_name, alias=new_alias, version=model_version.version
34 |             )
35 | 
36 |         else:
37 |             logger.warning("Model version did not pass tests, archiving model")
38 |         client.delete_registered_model_alias(name=model_name, alias=current_alias)
39 | 
40 |     @T.override
41 |     def run(self) -> base.Locals:
42 |         # services
43 |         # - logger
44 |         logger = self.logger_service.logger()
45 | 
46 |         # - mlflow
47 |         client = self.mlflow_service.client()
48 |         logger.info("With client: {}", client.tracking_uri)
49 | 
50 |         logger.info(
51 |             f"Deploying Model Named {self.registry_model_name} from {self.staging_alias} to {self.production_alias}"
52 |         )
53 |         self.promote_model_to_alias(
54 |             client=client,
55 |             model_name=self.registry_model_name,
56 |             current_alias=self.staging_alias,
57 |             new_alias=self.production_alias,
58 |         )
59 | 
60 |         logger.success("Model deployment complete")
61 | 
62 |         return locals()
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     from pathlib import Path
67 | 
68 |     from llmops_project import settings
69 |     from llmops_project.io import configs
70 | 
71 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
72 |     config_files = ["/deployment.yaml"]
73 | 
74 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
75 | 
76 |     files = [configs.parse_file(file) for file in file_paths]
77 | 
78 |     config = configs.merge_configs([*files])  # type: ignore
79 |     config["job"]["KIND"] = "DeployModelJob"  # type: ignore
80 | 
81 |     object_ = configs.to_object(config)  # python object
82 | 
83 |     setting = settings.MainSettings.model_validate(object_)
84 | 
85 |     with setting.job as runner:
86 |         runner.run()
87 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/deployment/register_model.py:
--------------------------------------------------------------------------------
  1 | # %% IMPORTS
  2 | import typing as T
  3 | from pathlib import Path
  4 | from typing import Any, Dict, List
  5 | 
  6 | import llmops_project.io.services as services
  7 | import mlflow
  8 | import mlflow.pyfunc
  9 | from llmops_project.pipelines import base
 10 | from mlflow import MlflowClient
 11 | from pydantic import BaseModel, ValidationError
 12 | 
 13 | logger = services.LoggerService().logger()
 14 | 
 15 | 
 16 | # %% Function to log the model to MLflow
 17 | def log_rag_model(
 18 |     model_path: str, config_path: str, input_example: T.Optional[Dict[str, Any]] = None
 19 | ) -> str:
 20 |     # Load model configuration from the config file
 21 |     # Start an MLflow run and log the model
 22 |     logger.warning("Config_path")
 23 |     with mlflow.start_run(run_name="rag_with_guardrails") as run:
 24 |         mlflow.langchain.log_model(
 25 |             lc_model=model_path,  # Path to the chain code file
 26 |             model_config=config_path,  # Path to the chain configuration file
 27 |             artifact_path="chain",  # Required by MLflow
 28 |             code_paths=[
 29 |                 config_path
 30 |             ],  # dependency definition included for the model to successfully import the implementation
 31 |             input_example=input_example,  # Input example for schema logging
 32 |             example_no_conversion=True,  # Use input_example directly as the chain's schema
 33 |         )
 34 |         return run.info.run_id  # Return the run ID for model registration
 35 | 
 36 | 
 37 | # %% Function to register the model in the MLflow model registry
 38 | def register_model(client: MlflowClient, run_id: str, model_name: str):
 39 |     model_uri = f"runs:/{run_id}/chain"
 40 |     result = mlflow.register_model(model_uri=model_uri, name=model_name)
 41 |     logger.success(
 42 |         f"Model registered successfully with name: {model_name}, version {result.version}"
 43 |     )
 44 | 
 45 |     client.set_registered_model_tag(name=model_name, key="model", value="claude3-haiku")
 46 | 
 47 | 
 48 | def load_model_by_alias(model_name, alias=None):
 49 |     # Construct the model URI using the alias if provided
 50 |     if alias:
 51 |         model_uri = f"models:/{model_name}@{alias}"
 52 |     else:
 53 |         model_uri = f"models:/{model_name}/latest"
 54 | 
 55 |     # Load the model
 56 |     model = mlflow.langchain.load_model(model_uri)
 57 | 
 58 |     return model
 59 | 
 60 | 
 61 | # Step 1: Define the expected output schema using Pydantic
 62 | class OutputSchema(BaseModel):
 63 |     result: str
 64 |     sources: List[Any]
 65 | 
 66 | 
 67 | # Step 2: Create a function to validate the output against the schema
 68 | def validate_output_schema(output: Dict[str, Any], schema: OutputSchema) -> bool:
 69 |     try:
 70 |         # Step 3: Validate the output against the schema model
 71 |         schema.model_validate(output)
 72 |         return True
 73 |     except ValidationError as e:
 74 |         print(f"Validation error: {e}")
 75 |         return False
 76 | 
 77 | 
 78 | def validate_model_signature(
 79 |     client: MlflowClient,
 80 |     model_name: str,
 81 |     vector_store_path: str,
 82 |     alias=None,
 83 | ):
 84 |     """
 85 |     Validates the model signature by testing it against relevant and non-relevant dialogs.
 86 | 
 87 |     Args:
 88 |         model_name (str): The name of the model to validate.
 89 |         alias (str, optional): An alias for the model. Defaults to None.
 90 | 
 91 |     Returns:
 92 |         None
 93 | 
 94 |     Raises:
 95 |         ValueError: If the model fails the schema or guardrail tests.
 96 | 
 97 |     This function performs the following steps:
 98 |         1. Loads the model using the provided name and alias.
 99 |         2. Invokes the model with relevant and non-relevant dialogs.
100 |         3. Validates the model's output against a predefined schema.
101 |         4. Checks if the model's output passes guardrail tests.
102 |         5. Updates the model's tags in MLflow with the test results.
103 |     """
104 |     # script_dir = Path(__file__).resolve().parent.parent
105 |     # project_root = script_dir.parent.parent.parent  # Adjusted to get to the project root as needed
106 | 
107 |     # vector_store_path = project_root / "faiss_db/"
108 | 
109 |     # Load Relevant Dialog and Non Relevant Dialog
110 |     non_relevant_dialog = {  # This will test Guardrail
111 |         "messages": [
112 |             {"role": "user", "content": "What is the company's sick leave policy?"},
113 |             {
114 |                 "role": "assistant",
115 |                 "content": "The company's sick leave policy allows employees to take a certain number of sick days per year. Please refer to the employee handbook for specific details and eligibility criteria.",
116 |             },
117 |             {"role": "user", "content": "What is the meaning of life?"},
118 |         ],
119 |         "vector_store_path": vector_store_path,
120 |     }
121 | 
122 |     relevant_dialog = {  # This will test schema
123 |         "messages": [
124 |             {"role": "user", "content": "What is the company sick leave policy?"},
125 |         ],
126 |         "vector_store_path": vector_store_path,
127 |     }
128 | 
129 |     model = load_model_by_alias(model_name, alias)
130 | 
131 |     non_relevant_result = model.invoke(non_relevant_dialog)
132 |     relevant_result = model.invoke(relevant_dialog)
133 | 
134 |     logger.debug(f"Relevant Result: {relevant_result}")
135 |     logger.debug(f"Non Relevant Result: {non_relevant_result}")
136 |     # Validate the output against the schema
137 |     is_schema_valid = validate_output_schema(relevant_result, OutputSchema)  # type: ignore
138 |     if is_schema_valid:
139 |         logger.success("Model Passsed Schema Tests")
140 |     else:
141 |         logger.error("Model Failed Schema Tests")
142 | 
143 |     # Validate Guardrail
144 |     # Specific value to validate against
145 |     guardrail_valid_output = {"result": "I cannot answer this question.", "sources": []}
146 | 
147 |     passed_guardrail_test = guardrail_valid_output == non_relevant_result
148 |     if passed_guardrail_test:
149 |         logger.success("Model Passsed Guadrail Tests")
150 |     else:
151 |         logger.error("Model Failed Guadrail Tests")
152 | 
153 |     if passed_guardrail_test and is_schema_valid:
154 |         logger.success("Model Passed all tests")
155 |         passed_tests = True
156 |     else:
157 |         logger.error("Model Failed tests")
158 |         passed_tests = False
159 | 
160 |     # Update Model Tags
161 | 
162 |     filter_string = f"name = '{model_name}'"
163 |     results = client.search_model_versions(filter_string=filter_string)
164 |     latest_version = max(results, key=lambda mv: int(mv.version))
165 | 
166 |     client.set_model_version_tag(
167 |         name=model_name, version=latest_version.version, key="passed_tests", value=str(passed_tests)
168 |     )
169 | 
170 | 
171 | def promote_model(client: MlflowClient, model_name: str, alias: str):
172 |     # Get latest version
173 |     filter_string = f"name = '{model_name}'"
174 |     results = client.search_model_versions(filter_string=filter_string)
175 |     latest_version = max(results, key=lambda mv: int(mv.version))
176 |     tags = latest_version.tags
177 | 
178 |     if tags["passed_tests"].lower() == "true":
179 |         client.set_registered_model_alias(
180 |             name=model_name, alias=alias, version=latest_version.version
181 |         )
182 |     else:
183 |         logger.error(
184 |             "COULD NOT PROMOTE MODEL: MODEL FAILED TESTS OR IS NOTE BETTER THAN PREVIOUS MODEL"
185 |         )
186 | 
187 | 
188 | # %% Job class for logging and registering the RAG model
189 | class LogAndRegisterModelJob(base.Job):  # type: ignore[misc]
190 |     """Job to log and register the RAG model in MLflow.
191 | 
192 |     Parameters:
193 |         run_config (services.MlflowService.RunConfig): mlflow run config.
194 |     """
195 | 
196 |     KIND: T.Literal["LogAndRegisterModelJob"] = "LogAndRegisterModelJob"
197 | 
198 |     registry_model_name: str
199 |     staging_alias: str = "champion"
200 |     llm_model_code_path: str
201 |     llm_confs: str
202 |     vector_store_path: str
203 | 
204 |     @T.override
205 |     def run(self) -> base.Locals:
206 |         # services
207 |         # - logger
208 |         logger = self.logger_service.logger()
209 | 
210 |         # - mlflow
211 |         client = self.mlflow_service.client()
212 |         logger.info("With client: {}", client.tracking_uri)
213 | 
214 |         logger.info(f"Logging Model Named {self.registry_model_name}")
215 | 
216 |         # Load Configuration
217 |         script_dir = Path(__file__).parent.parent.parent.parent.parent
218 |         config_path = str(str(script_dir) + self.llm_confs)
219 |         llm_code_path = str(str(script_dir) + self.llm_model_code_path)
220 |         vector_store_path = str(str(script_dir) + self.vector_store_path)
221 | 
222 |         logger.info(f"CONFIG PATH: {config_path}")
223 | 
224 |         model_specs = mlflow.models.ModelConfig(development_config=config_path)
225 |         input_example = model_specs.get("input_example")
226 | 
227 |         run_id = log_rag_model(
228 |             llm_code_path, config_path, input_example=input_example
229 |         )  # Log the model and get the run ID
230 | 
231 |         logger.info(f"Registering Model Named {self.registry_model_name}")
232 |         register_model(client, run_id, self.registry_model_name)  # Register the model
233 | 
234 |         logger.info(f"Validating Model Signature for {self.registry_model_name}")
235 |         validate_model_signature(
236 |             client,
237 |             model_name=self.registry_model_name,
238 |             vector_store_path=self.vector_store_path,
239 |             alias=None,
240 |         )
241 | 
242 |         promote_model(
243 |             client=client, model_name=self.registry_model_name, alias="champion"
244 |         )  # Promote model to champion if he passed the tests
245 |         logger.info(f"Promoting Model Named {self.registry_model_name}  to {self.staging_alias}")
246 |         logger.success("Model Registration complete")
247 |         return locals()
248 | 
249 | 
250 | if __name__ == "__main__":
251 |     from pathlib import Path
252 | 
253 |     from llmops_project import settings
254 |     from llmops_project.io import configs
255 | 
256 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
257 |     config_files = ["/deployment.yaml"]
258 | 
259 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
260 | 
261 |     files = [configs.parse_file(file) for file in file_paths]
262 | 
263 |     config = configs.merge_configs([*files])  # type: ignore
264 |     config["job"]["KIND"] = "LogAndRegisterModelJob"  # type: ignore
265 | 
266 |     object_ = configs.to_object(config)  # python object
267 | 
268 |     setting = settings.MainSettings.model_validate(object_)
269 | 
270 |     with setting.job as runner:
271 |         runner.run()
272 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/feature_engineering/create_vector_db.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | import os
 3 | import typing as T
 4 | from pathlib import Path
 5 | 
 6 | import dotenv
 7 | from langchain_aws import BedrockEmbeddings
 8 | from llmops_project.io.vector_db import QdrantVectorDB
 9 | 
10 | # import faiss
11 | from llmops_project.pipelines import base
12 | 
13 | 
14 | # %% Job class for creating the vector database
15 | class CreateVectorDBJob(base.Job):  # type: ignore[misc]
16 |     """Job to create an empty FAISS vector store.
17 | 
18 |     Parameters:
19 |         run_config (services.MlflowService.RunConfig): mlflow run config.
20 |     """
21 | 
22 |     KIND: T.Literal["CreateVectorDBJob"] = "CreateVectorDBJob"
23 | 
24 |     embedding_model: str
25 |     collection_name: str
26 |     vector_store_path: str
27 | 
28 |     @T.override
29 |     def run(self) -> base.Locals:
30 |         # Setup services
31 |         # services
32 |         # - logger
33 |         logger = self.logger_service.logger()
34 | 
35 |         # Run the main pipeline function to create the empty vector database
36 |         # Load .env file on the grandparent folder
37 |         script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
38 |         dotenv.load_dotenv(script_dir + "/.env")
39 | 
40 |         embeddings = BedrockEmbeddings(model_id=self.embedding_model)
41 | 
42 |         vector_db = QdrantVectorDB(
43 |             embeddings_model=embeddings,
44 |             collection_name=self.collection_name,
45 |             url=self.vector_store_path,
46 |             api_key=os.getenv("QDRANT_API_KEY"),
47 |             vector_size=1536,
48 |         )
49 | 
50 |         logger.info("Initializing empty Qdrant Collection vector store...")
51 | 
52 |         try:
53 |             vector_db.create_vector_db()
54 |         except Exception as e:
55 |             if "409" in str(e):
56 |                 logger.warning(f"Collection {self.collection_name} already exists")
57 |             else:
58 |                 raise e
59 | 
60 |         logger.success(
61 |             f"{vector_db.__class__.__name__} vector store created successfully on path {self.vector_store_path}"
62 |         )
63 |         return locals()
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     # Test the pipeline
68 | 
69 |     from pathlib import Path
70 | 
71 |     from llmops_project import settings
72 |     from llmops_project.io import configs
73 | 
74 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
75 |     config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"]
76 | 
77 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
78 | 
79 |     files = [configs.parse_file(file) for file in file_paths]
80 | 
81 |     config = configs.merge_configs([*files])  # type: ignore
82 |     config["job"]["KIND"] = "CreateVectorDBJob"  # type: ignore
83 | 
84 |     object_ = configs.to_object(config)  # python object
85 | 
86 |     setting = settings.MainSettings.model_validate(object_)
87 | 
88 |     with setting.job as runner:
89 |         runner.run()
90 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/feature_engineering/ingest_documents.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | import os
 3 | import typing as T
 4 | from pathlib import Path
 5 | 
 6 | import dotenv
 7 | from langchain_aws import BedrockEmbeddings
 8 | from llmops_project.io import services
 9 | from llmops_project.io.vector_db import QdrantVectorDB
10 | from llmops_project.pipelines import base
11 | 
12 | logger = services.LoggerService().logger()
13 | 
14 | 
15 | # %% Job class for ingesting documents and updating the vector database
16 | class IngestAndUpdateVectorDBJob(base.Job):  # type: ignore[misc]
17 |     """Job to ingest documents and update the FAISS vector store.
18 | 
19 |     Parameters:
20 |         run_config (services.MlflowService.RunConfig): mlflow run config.
21 |     """
22 | 
23 |     KIND: T.Literal["IngestAndUpdateVectorDBJob"] = "IngestAndUpdateVectorDBJob"
24 | 
25 |     embedding_model: str
26 |     vector_store_path: str
27 |     collection_name: str
28 |     document_path: str
29 | 
30 |     @T.override
31 |     def run(self) -> base.Locals:
32 |         # Setup services
33 |         # services
34 |         # - logger
35 |         logger = self.logger_service.logger()
36 | 
37 |         script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
38 |         document_path = script_dir + self.document_path
39 |         dotenv.load_dotenv(script_dir + "/.env")
40 | 
41 |         logger.info(f"Loading Documents from {document_path}...")
42 | 
43 |         embeddings = BedrockEmbeddings(model_id=self.embedding_model)
44 | 
45 |         vector_db = QdrantVectorDB(
46 |             embeddings_model=embeddings,
47 |             collection_name=self.collection_name,
48 |             url=self.vector_store_path,
49 |             api_key=os.getenv("QDRANT_API_KEY"),
50 |             vector_size=1536,
51 |         )
52 | 
53 |         logger.info(
54 |             f"Ingesting documents and updating the {vector_db.__class__.__name__} vector store..."
55 |         )
56 | 
57 |         vector_db.ingest_documents(document_path)
58 | 
59 |         logger.success("Documents ingested and vector store updated successfully")
60 |         # test_vectordb()
61 |         return locals()
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     # Test the pipeline
66 | 
67 |     from llmops_project import settings
68 |     from llmops_project.io import configs
69 | 
70 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
71 |     config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"]
72 | 
73 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
74 | 
75 |     files = [configs.parse_file(file) for file in file_paths]
76 | 
77 |     config = configs.merge_configs([*files])  # type: ignore
78 |     config["job"]["KIND"] = "IngestAndUpdateVectorDBJob"  # type: ignore
79 | 
80 |     object_ = configs.to_object(config)  # python object
81 | 
82 |     setting = settings.MainSettings.model_validate(object_)
83 | 
84 |     with setting.job as runner:
85 |         runner.run()
86 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/managers/deployment_manager.py:
--------------------------------------------------------------------------------
  1 | import typing as T
  2 | from pathlib import Path
  3 | 
  4 | from llmops_project.pipelines import base
  5 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob
  6 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob
  7 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob
  8 | 
  9 | AUTOMATIC_DEPLOYMENT = True
 10 | 
 11 | 
 12 | # %% Job class for logging and registering the RAG model
 13 | class DeploymentJob(base.Job):  # type: ignore[misc]
 14 |     """Job to log and register the RAG model in MLflow.
 15 | 
 16 |     Parameters:
 17 |         run_config (services.MlflowService.RunConfig): mlflow run config.
 18 |     """
 19 | 
 20 |     KIND: T.Literal["DeploymentJob"] = "DeploymentJob"
 21 | 
 22 |     # Deployment
 23 |     registry_model_name: str
 24 |     llm_model_code_path: str
 25 |     llm_confs: str
 26 |     staging_alias: str = "champion"
 27 |     production_alias: str = "production"
 28 | 
 29 |     # Evaluation
 30 |     qa_dataset_path: str
 31 |     alias: str
 32 |     vector_store_path: str
 33 |     metric_tresholds: dict[str, float]
 34 | 
 35 |     @T.override
 36 |     def run(self) -> base.Locals:
 37 |         # services
 38 |         # - logger
 39 |         logger = self.logger_service.logger()
 40 | 
 41 |         logger.info("Starting Model Deployment Workflow")
 42 |         logger.info("Step: Log and Register Model")
 43 | 
 44 |         # Log and register the model
 45 |         with LogAndRegisterModelJob(
 46 |             registry_model_name=self.registry_model_name,
 47 |             staging_alias=self.staging_alias,
 48 |             vector_store_path=self.vector_store_path,
 49 |             llm_model_code_path=self.llm_model_code_path,
 50 |             llm_confs=self.llm_confs,
 51 |         ) as log_and_register_job:
 52 |             log_and_register_job.run()
 53 | 
 54 |         logger.info("Step: Evaluate Model")
 55 | 
 56 |         # Evaluate the model
 57 |         with EvaluateModelJob(
 58 |             registry_model_name=self.registry_model_name,
 59 |             qa_dataset_path=self.qa_dataset_path,
 60 |             alias=self.alias,
 61 |             vector_store_path=self.vector_store_path,
 62 |             metric_tresholds=self.metric_tresholds,
 63 |         ) as evaluate_job:
 64 |             evaluate_job.run()
 65 | 
 66 |         if not AUTOMATIC_DEPLOYMENT:
 67 |             logger.warning("Automatic Deployment is disabled")
 68 |             return locals()
 69 | 
 70 |         else:
 71 |             logger.info("Step: Deploy Model")
 72 | 
 73 |         # Deploy the model
 74 |         with DeployModelJob(
 75 |             staging_alias=self.alias,
 76 |             production_alias=self.production_alias,
 77 |             registry_model_name=self.registry_model_name,
 78 |         ) as deploy_job:
 79 |             deploy_job.run()  # Automatic Deployment
 80 | 
 81 |         logger.success("Model Deployment Workflow complete")
 82 | 
 83 |         return locals()
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     from llmops_project import settings
 88 |     from llmops_project.io import configs
 89 | 
 90 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
 91 |     config_files = ["/deployment.yaml", "/monitoring.yaml"]
 92 | 
 93 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
 94 | 
 95 |     files = [configs.parse_file(file) for file in file_paths]
 96 | 
 97 |     config = configs.merge_configs([*files])  # type: ignore
 98 |     config["job"]["KIND"] = "DeploymentJob"  # type: ignore
 99 | 
100 |     object_ = configs.to_object(config)  # python object
101 | 
102 |     setting = settings.MainSettings.model_validate(object_)
103 | 
104 |     with setting.job as runner:
105 |         runner.run()
106 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/managers/feature_engineering_manager.py:
--------------------------------------------------------------------------------
 1 | import typing as T
 2 | from pathlib import Path
 3 | 
 4 | from llmops_project.pipelines import base
 5 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob
 6 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob
 7 | 
 8 | 
 9 | # %% Job class for logging and registering the RAG model
10 | class FeatureEngineeringJob(base.Job):  # type: ignore[misc]
11 |     """Job to log and register the RAG model in MLflow.
12 | 
13 |     Parameters:
14 |         run_config (services.MlflowService.RunConfig): mlflow run config.
15 |     """
16 | 
17 |     KIND: T.Literal["FeatureEngineeringJob"] = "FeatureEngineeringJob"
18 | 
19 |     embedding_model: str
20 |     vector_store_path: str
21 |     document_path: str
22 |     collection_name: str
23 | 
24 |     @T.override
25 |     def run(self) -> base.Locals:
26 |         # Setup services
27 |         # services
28 |         # - logger
29 |         logger = self.logger_service.logger()
30 | 
31 |         logger.info("Starting Feature Engineering Workflow")
32 | 
33 |         # Ensure the config path is relative to this script's location
34 |         script_dir = Path(__file__).resolve().parent.parent.parent.parent.parent
35 |         document_path = str(script_dir / self.document_path)
36 | 
37 |         logger.info("Creating Vector Database")
38 | 
39 |         # Create the vector database
40 |         with CreateVectorDBJob(
41 |             embedding_model=self.embedding_model,
42 |             vector_store_path=self.vector_store_path,
43 |             collection_name=self.collection_name,
44 |         ) as create_vector_db_job:
45 |             create_vector_db_job.run()
46 | 
47 |         # Ingest the documents
48 |         with IngestAndUpdateVectorDBJob(
49 |             embedding_model=self.embedding_model,
50 |             vector_store_path=self.vector_store_path,
51 |             collection_name=self.collection_name,
52 |             document_path=document_path,
53 |         ) as injest_job:
54 |             injest_job.run()
55 | 
56 |         logger.success("Feature Engineering Workflow complete")
57 | 
58 |         return locals()
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     # Test the pipeline
63 | 
64 |     from llmops_project import settings
65 |     from llmops_project.io import configs
66 | 
67 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
68 |     config_files = ["/rag_chain_config.yaml", "/feature_eng.yaml"]
69 | 
70 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
71 | 
72 |     files = [configs.parse_file(file) for file in file_paths]
73 | 
74 |     config = configs.merge_configs([*files])  # type: ignore
75 |     config["job"]["KIND"] = "FeatureEngineeringJob"  # type: ignore
76 | 
77 |     object_ = configs.to_object(config)  # python object
78 | 
79 |     setting = settings.MainSettings.model_validate(object_)
80 | 
81 |     with setting.job as runner:
82 |         runner.run()
83 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/managers/monitoring_manager.py:
--------------------------------------------------------------------------------
 1 | import typing as T
 2 | from pathlib import Path
 3 | 
 4 | from llmops_project.pipelines import base
 5 | from llmops_project.pipelines.monitoring.post_deploy_eval import MonitoringEvalJob
 6 | 
 7 | 
 8 | # %% Job class for logging and registering the RAG model
 9 | class MonitoringJob(base.Job, frozen=True):  # type: ignore[misc]
10 |     """Job to orchestrate Monitoring Workflow.
11 | 
12 |     Parameters:
13 |         run_config (services.MlflowService.RunConfig): mlflow run config.
14 |     """
15 | 
16 |     KIND: T.Literal["MonitoringJob"] = "MonitoringJob"
17 | 
18 |     trace_experiment_name: str
19 |     monitoring_experiment_name: str
20 |     filter_string: T.Optional[str] = None
21 | 
22 |     @T.override
23 |     def run(self) -> base.Locals:
24 |         # services
25 |         # - logger
26 |         logger = self.logger_service.logger()
27 | 
28 |         logger.info("Starting Model Monitoring Workflow")
29 | 
30 |         MonitoringEvalJob(
31 |             trace_experiment_name=self.trace_experiment_name,
32 |             monitoring_experiment_name=self.monitoring_experiment_name,
33 |             filter_string=self.filter_string,
34 |         ).run()
35 | 
36 |         logger.success("Model Monitoring Workflow complete")
37 | 
38 |         return locals()
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     from llmops_project import settings
43 |     from llmops_project.io import configs
44 | 
45 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
46 |     config_files = ["/rag_chain_config.yaml", "/monitoring.yaml"]
47 | 
48 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
49 | 
50 |     files = [configs.parse_file(file) for file in file_paths]
51 | 
52 |     config = configs.merge_configs([*files])  # type: ignore
53 |     config["job"]["KIND"] = "MonitoringJob"  # type: ignore
54 | 
55 |     object_ = configs.to_object(config)  # python object
56 | 
57 |     setting = settings.MainSettings.model_validate(object_)
58 | 
59 |     with setting.job as runner:
60 |         runner.run()
61 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/monitoring/generate_rag_dataset.py:
--------------------------------------------------------------------------------
  1 | import typing as T
  2 | from pathlib import Path
  3 | 
  4 | import nest_asyncio
  5 | from llama_index.core import SimpleDirectoryReader
  6 | from llama_index.core.llama_dataset.generator import RagDatasetGenerator
  7 | from llama_index.llms.bedrock import Bedrock
  8 | from llmops_project.pipelines import base
  9 | 
 10 | nest_asyncio.apply()
 11 | 
 12 | 
 13 | # %% Job class for generating the RAG dataset
 14 | class GenerateRagDatasetJob(base.Job):  # type: ignore[misc]
 15 |     """Job to Generate RAG evaluation dataset from documents in the specified data path.
 16 | 
 17 |     Parameters:
 18 |         run_config (services.MlflowService.RunConfig): mlflow run config.
 19 |     """
 20 | 
 21 |     KIND: T.Literal["GenerateRagDatasetJob"] = "GenerateRagDatasetJob"
 22 | 
 23 |     data_path: str
 24 |     qa_dataset_path_csv: str
 25 |     qa_dataset_path_json: str
 26 |     llm_model: str
 27 | 
 28 |     def generate_rag_dataset(
 29 |         self, data_path: str, final_dataset_csv_path: str, final_dataset_json_path: str, model: str
 30 |     ):
 31 |         """Generate a RAG dataset from documents in the specified data path.
 32 | 
 33 |         Args:
 34 |             data_path (str): Path to the directory containing the data.
 35 |             final_dataset_path (str): Path where the final dataset CSV will be saved.
 36 |             model (str): The model to be used for generating the dataset.
 37 |         """
 38 |         nest_asyncio.apply()
 39 |         logger = self.logger_service.logger()
 40 | 
 41 |         # Convert string paths to Path objects
 42 |         script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
 43 |         data_path = script_dir + self.data_path
 44 | 
 45 |         final_dataset_path = final_dataset_csv_path
 46 | 
 47 |         logger.info("Loading Data from ", data_path)
 48 |         # Load documents from the specified data path
 49 |         reader = SimpleDirectoryReader(data_path)
 50 |         documents = reader.load_data()
 51 | 
 52 |         logger.info("Loaded {} documents".format(len(documents)))
 53 | 
 54 |         # Initialize the LLM with the specified model
 55 |         llm = Bedrock(model=model, request_timeout=60.0)
 56 | 
 57 |         # Generate the dataset from the documents
 58 |         dataset_generator = RagDatasetGenerator.from_documents(
 59 |             documents,
 60 |             llm=llm,
 61 |             num_questions_per_chunk=2,
 62 |             show_progress=True,
 63 |         )
 64 | 
 65 |         # Generate the RAG dataset
 66 |         rag_dataset = dataset_generator.generate_dataset_from_nodes()
 67 | 
 68 |         # Convert the dataset to a pandas DataFrame and save it as a CSV
 69 |         df_dataset = rag_dataset.to_pandas()
 70 |         df_dataset.to_csv(final_dataset_path)
 71 | 
 72 |         # Save the dataset as a JSON file
 73 |         rag_dataset.save_json(final_dataset_json_path)
 74 | 
 75 |         logger.success("RAG dataset generated successfully and saved to {}", final_dataset_path)
 76 | 
 77 |     @T.override
 78 |     def run(self) -> base.Locals:
 79 |         # services
 80 |         # - logger
 81 |         logger = self.logger_service.logger()
 82 | 
 83 |         # Set up paths
 84 |         # Ensure the paths are relative to this script's location
 85 |         script_dir = Path(__file__).resolve().parent.parent
 86 |         project_root = (
 87 |             script_dir.parent.parent.parent
 88 |         )  # Adjusted to get to the project root as needed
 89 | 
 90 |         data_path = str(project_root / self.data_path)
 91 |         final_dataset_path = str(project_root / self.qa_dataset_path_csv)
 92 |         final_dataset_json_path = str(project_root / self.qa_dataset_path_json)
 93 | 
 94 |         # Generate RAG Dataset
 95 |         logger.info("Generating RAG dataset from documents in {}", data_path)
 96 |         self.generate_rag_dataset(
 97 |             data_path, final_dataset_path, final_dataset_json_path, self.llm_model
 98 |         )
 99 | 
100 |         logger.success("RAG dataset generated successfully")
101 | 
102 |         return locals()
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     from pathlib import Path
107 | 
108 |     from llmops_project import settings
109 |     from llmops_project.io import configs
110 | 
111 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
112 |     config_files = ["/generate_rag_dataset.yaml"]
113 | 
114 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
115 | 
116 |     files = [configs.parse_file(file) for file in file_paths]
117 | 
118 |     config = configs.merge_configs([*files])  # type: ignore
119 |     config["job"]["KIND"] = "GenerateRagDatasetJob"  # type: ignore
120 | 
121 |     object_ = configs.to_object(config)  # python object
122 | 
123 |     setting = settings.MainSettings.model_validate(object_)
124 | 
125 |     with setting.job as runner:
126 |         runner.run()
127 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/monitoring/post_deploy_eval.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import time
  3 | import typing as T
  4 | from pathlib import Path
  5 | from typing import Optional
  6 | 
  7 | import mlflow
  8 | import pandas as pd
  9 | import plotly.graph_objects as go
 10 | from llmops_project.pipelines import base
 11 | from plotly.subplots import make_subplots
 12 | 
 13 | 
 14 | def filter_generations(df):
 15 |     return df[
 16 |         df["response"].apply(
 17 |             lambda x: "generations" not in json.loads(x) if pd.notnull(x) else True
 18 |         )
 19 |     ]
 20 | 
 21 | 
 22 | def extract_answer(data):
 23 |     if data:
 24 |         data_dict = json.loads(data)
 25 |         if "result" in data_dict:
 26 |             return data_dict["result"]
 27 |     return None
 28 | 
 29 | 
 30 | def extract_last_message_content(request):
 31 |     return json.loads(request)["messages"][-1]["content"]
 32 | 
 33 | 
 34 | def create_gauge_chart(value1, title1, value2, title2):
 35 |     # Create a subplot figure with two columns
 36 |     fig = make_subplots(rows=1, cols=2, specs=[[{"type": "indicator"}, {"type": "indicator"}]])
 37 | 
 38 |     # Add the first gauge chart
 39 |     fig.add_trace(
 40 |         go.Indicator(
 41 |             mode="gauge+number",
 42 |             value=value1,
 43 |             title={"text": title1},
 44 |             gauge={"axis": {"range": [None, 18]}},
 45 |         ),
 46 |         row=1,
 47 |         col=1,
 48 |     )
 49 | 
 50 |     # Add the second gauge chart
 51 |     fig.add_trace(
 52 |         go.Indicator(
 53 |             mode="gauge+number",
 54 |             value=value2,
 55 |             title={"text": title2},
 56 |             gauge={"axis": {"range": [None, 100]}},
 57 |         ),
 58 |         row=1,
 59 |         col=2,
 60 |     )
 61 | 
 62 |     # Update layout
 63 |     fig.update_layout(height=400, width=800)
 64 | 
 65 |     # Show figure
 66 |     # fig.show()
 67 | 
 68 |     return fig
 69 | 
 70 | 
 71 | class MonitoringEvalJob(base.Job):  # type: ignore[misc]
 72 |     """Job to Evaluate the challenger model based on a QA dataset."""
 73 | 
 74 |     KIND: T.Literal["MonitoringEvalJob"] = "MonitoringEvalJob"
 75 | 
 76 |     trace_experiment_name: str
 77 |     monitoring_experiment_name: str
 78 |     filter_string: Optional[str] = None
 79 | 
 80 |     @T.override
 81 |     def run(self) -> base.Locals:
 82 |         """Run the job to evaluate the model.
 83 | 
 84 |         Returns:
 85 |             base.Locals: The local variables after running the job.
 86 |         """
 87 | 
 88 |         # services
 89 |         # - logger
 90 |         logger = self.logger_service.logger()
 91 | 
 92 |         # - mlflow
 93 |         client = self.mlflow_service.client()
 94 |         logger.info("With client: {}", client.tracking_uri)
 95 | 
 96 |         experiment = mlflow.get_experiment_by_name(self.trace_experiment_name)
 97 |         if experiment:
 98 |             experiment_id = experiment.experiment_id
 99 |             logger.info(f"Experiment ID: {experiment_id}")
100 |         else:
101 |             logger.error("Experiment with the traces not found.")
102 |             return locals()  # Add return statement here
103 | 
104 |         # Set the filter string to only include runs from the last week
105 |         if self.filter_string is None:
106 |             one_week_ago = int((time.time() - 7 * 24 * 60 * 60) * 1000)  # Convert to milliseconds
107 |             filter_string = f"trace.timestamp_ms > {one_week_ago}"
108 |             logger.success("Monitoring traces from the last week")
109 | 
110 |         else:
111 |             filter_string = self.filter_string
112 | 
113 |         # Search all the traces in the experiment that match the filter string
114 |         traces_df = mlflow.search_traces(
115 |             experiment_ids=[experiment_id],
116 |             filter_string=filter_string,
117 |             max_results=2000,
118 |         )
119 | 
120 |         # Filter error traces
121 |         traces_df = traces_df[traces_df["status"] != "TraceStatus.ERROR"]
122 |         traces_df = filter_generations(traces_df)
123 | 
124 |         # Extract the answer and question from the request and response
125 |         traces_df["answer"] = traces_df["response"].apply(extract_answer)
126 |         traces_df["question"] = traces_df["request"].apply(extract_last_message_content)
127 | 
128 |         #  Create a DataFrame with the inputs and predictions
129 |         eval_df = traces_df[["question", "answer"]]
130 |         eval_df = eval_df.rename(columns={"question": "inputs", "answer": "predictions"})
131 | 
132 |         # remove predictions with None values
133 |         eval_df = eval_df.dropna()
134 | 
135 |         # Get the current week number
136 |         current_week = time.strftime("CW%U")
137 | 
138 |         mlflow.set_experiment(self.monitoring_experiment_name)
139 | 
140 |         logger.info(
141 |             "Monitoring results to be logged in experiment: {}", self.monitoring_experiment_name
142 |         )
143 | 
144 |         answer_relevance = mlflow.metrics.genai.answer_relevance(  # Compares input with predictions to check if its relevant (good for monitoring)
145 |             model="bedrock:/anthropic.claude-3-haiku-20240307-v1:0",
146 |             parameters={
147 |                 "temperature": 0,
148 |                 "anthropic_version": "bedrock-2023-05-31",
149 |             },
150 |         )
151 | 
152 |         with mlflow.start_run(run_name=current_week):
153 |             results = mlflow.evaluate(  # type: ignore
154 |                 data=eval_df[["inputs", "predictions"]],
155 |                 predictions="predictions",
156 |                 model_type="text",
157 |                 evaluators=["default"],
158 |                 extra_metrics=[answer_relevance],
159 |             )
160 | 
161 |             toxicity_score = results.metrics["toxicity/v1/mean"]
162 |             # Calculate non-toxicity score
163 |             non_toxicity_score = "{:.2f}".format((1 - toxicity_score) * 100)
164 |             readability_score = "{:.2f}".format(
165 |                 results.metrics["flesch_kincaid_grade_level/v1/mean"]
166 |             )
167 |             logger.info("Non Toxicity Score: {}", non_toxicity_score)
168 |             logger.info("Readability Score: {}", readability_score)
169 | 
170 |             guage = create_gauge_chart(
171 |                 float(readability_score),
172 |                 "English Readability score",
173 |                 float(non_toxicity_score),
174 |                 "Non Toxicity Score",
175 |             )
176 |             mlflow.log_figure(guage, "gauge_chart.png")
177 | 
178 |         logger.success("Model Monitoring completed successfully.")
179 | 
180 |         return locals()
181 | 
182 | 
183 | if __name__ == "__main__":
184 |     from pathlib import Path
185 | 
186 |     from llmops_project import settings
187 |     from llmops_project.io import configs
188 | 
189 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
190 |     config_files = ["/monitoring.yaml"]
191 | 
192 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
193 | 
194 |     files = [configs.parse_file(file) for file in file_paths]
195 | 
196 |     config = configs.merge_configs([*files])  # type: ignore
197 |     config["job"]["KIND"] = "MonitoringEvalJob"  # type: ignore
198 | 
199 |     object_ = configs.to_object(config)  # python object
200 | 
201 |     setting = settings.MainSettings.model_validate(object_)
202 | 
203 |     with setting.job as runner:
204 |         runner.run()
205 | 


--------------------------------------------------------------------------------
/src/llmops_project/pipelines/monitoring/pre_deploy_eval.py:
--------------------------------------------------------------------------------
  1 | import typing as T
  2 | from pathlib import Path
  3 | 
  4 | import mlflow
  5 | import pandas as pd
  6 | from llmops_project.pipelines import base
  7 | from mlflow import MlflowClient
  8 | 
  9 | 
 10 | class EvaluateModelJob(base.Job):  # type: ignore[misc]
 11 |     """Job to Evaluate the challenger model based on a QA dataset.
 12 | 
 13 |     Attributes:
 14 |         KIND (Literal["EvaluateModelJob"]): The kind of job.
 15 |         qa_dataset_path (str): Path to the QA dataset.
 16 |         registry_model_name (str): Name of the model in the registry.
 17 |         alias (str): Alias of the model version.
 18 |         vector_store_path (str): Path to the vector store.
 19 |         metric_tresholds (dict[float]): Dictionary of metric thresholds.
 20 |     """
 21 | 
 22 |     KIND: T.Literal["EvaluateModelJob"] = "EvaluateModelJob"
 23 | 
 24 |     qa_dataset_path: str
 25 |     registry_model_name: str
 26 |     alias: str
 27 |     vector_store_path: str
 28 |     metric_tresholds: dict[str, float]
 29 | 
 30 |     def load_qa_dataset(self, data_path: str) -> pd.DataFrame:
 31 |         """Load the QA dataset from the specified path.
 32 | 
 33 |         Args:
 34 |             data_path (str): Path to the QA dataset.
 35 | 
 36 |         Returns:
 37 |             pd.DataFrame: The loaded QA dataset.
 38 |         """
 39 |         df = pd.read_csv(data_path)
 40 |         df = df.copy()
 41 |         df = df.rename(
 42 |             columns={
 43 |                 "query": "inputs",
 44 |                 "reference_answer": "ground_truth",
 45 |                 "reference_contexts": "context",
 46 |             }
 47 |         )
 48 |         return df
 49 | 
 50 |     def generate_python_function_from_model(
 51 |         self, model_name: str, model_alias: str, vector_db_path: str
 52 |     ) -> T.Callable[[pd.DataFrame], pd.Series]:
 53 |         """Generate a Python function from the model.
 54 | 
 55 |         Args:
 56 |             model_name (str): Name of the model.
 57 |             model_alias (str): Alias of the model version.
 58 |             vector_db_path (str): Path to the vector store.
 59 | 
 60 |         Returns:
 61 |             Callable[[pd.DataFrame], pd.Series]: A function that takes a DataFrame of inputs and returns a Series of predictions.
 62 |         """
 63 |         model_uri = f"models:/{model_name}@{model_alias}"
 64 |         model = mlflow.langchain.load_model(model_uri)
 65 | 
 66 |         def model_qa(inputs: pd.Series) -> pd.Series:
 67 |             answers = []
 68 |             for index, row in inputs.iterrows():
 69 |                 question = {
 70 |                     "messages": [
 71 |                         {"role": "user", "content": f"{row['inputs']}"},
 72 |                     ],
 73 |                     "vector_store_path": vector_db_path,
 74 |                 }
 75 |                 answer = model.invoke(question)
 76 |                 answers.append(answer["result"])
 77 |             return answers
 78 | 
 79 |         return model_qa
 80 | 
 81 |     def evaluate_model(self, eval_df: pd.DataFrame) -> mlflow.models.EvaluationResult:
 82 |         """Evaluate the model using the evaluation DataFrame.
 83 | 
 84 |         Args:
 85 |             eval_df (pd.DataFrame): DataFrame containing the evaluation data.
 86 | 
 87 |         Returns:
 88 |             mlflow.models.EvaluationResult: The evaluation results.
 89 |         """
 90 |         with mlflow.start_run():
 91 |             results = mlflow.evaluate(  # type: ignore
 92 |                 data=eval_df[["inputs", "ground_truth", "predictions"]],
 93 |                 targets="ground_truth",
 94 |                 predictions="predictions",
 95 |                 model_type="question-answering",
 96 |                 evaluators=["default"],
 97 |             )
 98 |             return results
 99 | 
100 |     def set_tag_for_model_evals(
101 |         self, beats_baseline: bool, model_name: str, current_alias: str = "champion"
102 |     ) -> None:
103 |         """Set a tag for the model evaluations.
104 | 
105 |         Args:
106 |             beats_baseline (bool): Whether the model meets the evaluation criteria.
107 |             model_name (str): Name of the model.
108 |             current_alias (str, optional): Alias of the current model version. Defaults to "champion".
109 |         """
110 |         client = MlflowClient()
111 |         model_version = client.get_model_version_by_alias(name=model_name, alias=current_alias)
112 |         client.set_model_version_tag(
113 |             name=model_name,
114 |             version=model_version.version,
115 |             key="meets_evaluation_criteria",
116 |             value=beats_baseline,
117 |         )
118 | 
119 |     @T.override
120 |     def run(self) -> base.Locals:
121 |         """Run the job to evaluate the model.
122 | 
123 |         Returns:
124 |             base.Locals: The local variables after running the job.
125 |         """
126 |         # services
127 |         logger = self.logger_service.logger()
128 | 
129 |         # Set up paths
130 |         script_dir = str(Path(__file__).resolve().parent.parent.parent.parent.parent)
131 | 
132 |         logger.info("Script Directory: {}", script_dir)
133 |         data_path = str(script_dir + self.qa_dataset_path)
134 | 
135 |         logger.info("Loading QA dataset from {}", data_path)
136 |         eval_df = self.load_qa_dataset(data_path)
137 |         model = self.generate_python_function_from_model(
138 |             self.registry_model_name, self.alias, self.vector_store_path
139 |         )
140 |         logger.info('Using Vector Store at "{}"', self.vector_store_path)
141 | 
142 |         logger.info("Running Predictions on the QA Dataset")
143 |         eval_df["predictions"] = model(eval_df)
144 | 
145 |         logger.info("Evaluating the model")
146 |         results = self.evaluate_model(eval_df)
147 |         result_metrics = results.metrics
148 | 
149 |         metrics = [
150 |             result_metrics["flesch_kincaid_grade_level/v1/mean"],
151 |             result_metrics["ari_grade_level/v1/mean"],
152 |         ]
153 | 
154 |         logger.info("Model Evaluation Metrics: {}", result_metrics)
155 | 
156 |         thresholds = [
157 |             self.metric_tresholds["flesch_kincaid_grade_level_mean"],
158 |             self.metric_tresholds["ari_grade_level_mean"],
159 |         ]
160 | 
161 |         beats_baseline = True
162 |         for metric, threshold in zip(metrics, thresholds):
163 |             if metric < threshold:
164 |                 beats_baseline = False
165 |                 break
166 | 
167 |         logger.info(f"Model meets evaluation criteria: {beats_baseline}")
168 | 
169 |         self.set_tag_for_model_evals(
170 |             beats_baseline, model_name=self.registry_model_name, current_alias=self.alias
171 |         )
172 |         logger.success("Model evaluation complete")
173 | 
174 |         return locals()
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     from pathlib import Path
179 | 
180 |     from llmops_project import settings
181 |     from llmops_project.io import configs
182 | 
183 |     script_dir = str(Path(__file__).parent.parent.parent.parent.parent)
184 |     config_files = ["/monitoring.yaml"]
185 | 
186 |     file_paths = [script_dir + "/confs/" + file for file in config_files]
187 | 
188 |     files = [configs.parse_file(file) for file in file_paths]
189 | 
190 |     config = configs.merge_configs([*files])  # type: ignore
191 |     config["job"]["KIND"] = "EvaluateModelJob"  # type: ignore
192 | 
193 |     object_ = configs.to_object(config)  # python object
194 | 
195 |     setting = settings.MainSettings.model_validate(object_)
196 | 
197 |     with setting.job as runner:
198 |         runner.run()
199 | 


--------------------------------------------------------------------------------
/src/llmops_project/scripts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Scripts for the CLI application."""
 3 | 
 4 | # ruff: noqa: E402
 5 | 
 6 | # %% WARNINGS
 7 | 
 8 | import warnings
 9 | 
10 | # disable annoying mlflow warnings
11 | warnings.filterwarnings(action="ignore", category=UserWarning)
12 | 
13 | # %% IMPORTS
14 | 
15 | import argparse
16 | import json
17 | import sys
18 | 
19 | from llmops_project import settings
20 | from llmops_project.io import configs
21 | 
22 | # %% PARSERS
23 | 
24 | parser = argparse.ArgumentParser(description="Run an AI/ML job from YAML/JSON configs.")
25 | parser.add_argument("files", nargs="*", help="Config files for the job (local path only).")
26 | parser.add_argument("-e", "--extras", nargs="*", default=[], help="Config strings for the job.")
27 | parser.add_argument("-s", "--schema", action="store_true", help="Print settings schema and exit.")
28 | 
29 | # %% SCRIPTS
30 | 
31 | 
32 | def main(argv: list[str] | None = None) -> int:
33 |     """Main script for the application."""
34 |     args = parser.parse_args(argv)
35 |     if args.schema:
36 |         schema = settings.MainSettings.model_json_schema()
37 |         json.dump(schema, sys.stdout, indent=4)
38 |         return 0
39 |     files = [configs.parse_file(file) for file in args.files]
40 |     strings = [configs.parse_string(string) for string in args.extras]
41 |     if len(files) == 0 and len(strings) == 0:
42 |         raise RuntimeError("No configs provided.")
43 |     config = configs.merge_configs([*files, *strings])
44 |     object_ = configs.to_object(config)  # python object
45 |     setting = settings.MainSettings.model_validate(object_)
46 |     with setting.job as runner:
47 |         runner.run()
48 |         return 0
49 | 


--------------------------------------------------------------------------------
/src/llmops_project/settings.py:
--------------------------------------------------------------------------------
 1 | """Define settings for the application."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | import pydantic as pdt
 6 | import pydantic_settings as pdts
 7 | 
 8 | from llmops_project import pipelines
 9 | 
10 | # %% SETTINGS
11 | 
12 | 
13 | class Settings(pdts.BaseSettings, strict=True, frozen=True, extra="allow"):  # type: ignore[misc]
14 |     """Base class for application settings.
15 | 
16 |     Use settings to provide high-level preferences.
17 |     i.e., to separate settings from provider (e.g., CLI).
18 |     """
19 | 
20 | 
21 | class MainSettings(Settings):  # type: ignore[misc]
22 |     """Main settings of the application.
23 | 
24 |     Parameters:
25 |         job (jobs.JobKind): job to run.
26 |     """
27 | 
28 |     job: pipelines.JobKind = pdt.Field(..., discriminator="KIND")
29 | 


--------------------------------------------------------------------------------
/static/autoscaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/autoscaling.png


--------------------------------------------------------------------------------
/static/experiment_tracking.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/experiment_tracking.png


--------------------------------------------------------------------------------
/static/guage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/guage.png


--------------------------------------------------------------------------------
/static/llmops-rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmops-rag.png


--------------------------------------------------------------------------------
/static/llmops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmops.png


--------------------------------------------------------------------------------
/static/llmopsmindmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/llmopsmindmap.png


--------------------------------------------------------------------------------
/static/model_version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/model_version.png


--------------------------------------------------------------------------------
/static/monitoring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/monitoring.png


--------------------------------------------------------------------------------
/static/rag_lifecycle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/rag_lifecycle.png


--------------------------------------------------------------------------------
/static/tracing-top.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/tracing-top.gif


--------------------------------------------------------------------------------
/static/vector_db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/static/vector_db.png


--------------------------------------------------------------------------------
/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | """Task collections for the project."""
 2 | 
 3 | # mypy: ignore-errors
 4 | 
 5 | # %% IMPORTS
 6 | 
 7 | from invoke import Collection
 8 | 
 9 | from . import (
10 |     checks,
11 |     cleans,
12 |     commits,
13 |     containers,
14 |     docs,
15 |     formats,
16 |     installs,
17 |     mlflow,
18 |     packages,
19 |     projects,
20 |     serve,
21 | )
22 | 
23 | # %% NAMESPACES
24 | 
25 | ns = Collection()
26 | 
27 | # %% COLLECTIONS
28 | 
29 | ns.add_collection(checks)
30 | ns.add_collection(cleans)
31 | ns.add_collection(commits)
32 | ns.add_collection(containers)
33 | ns.add_collection(docs)
34 | ns.add_collection(formats)
35 | ns.add_collection(installs)
36 | ns.add_collection(mlflow)
37 | ns.add_collection(packages)
38 | ns.add_collection(serve)
39 | ns.add_collection(projects, default=True)
40 | 


--------------------------------------------------------------------------------
/tasks/checks.py:
--------------------------------------------------------------------------------
 1 | """Check tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def poetry(ctx: Context) -> None:
13 |     """Check poetry config files."""
14 |     ctx.run("poetry check --lock", pty=True)
15 | 
16 | 
17 | @task
18 | def format(ctx: Context) -> None:
19 |     """Check the formats with ruff."""
20 |     ctx.run("poetry run ruff format --check src/ tasks/ tests/", pty=True)
21 | 
22 | 
23 | @task
24 | def type(ctx: Context) -> None:
25 |     """Check the types with mypy."""
26 |     ctx.run("poetry run mypy src/ tasks/ tests/", pty=True)
27 | 
28 | 
29 | @task
30 | def code(ctx: Context) -> None:
31 |     """Check the codes with ruff."""
32 |     ctx.run("poetry run ruff check src/ tasks/ tests/", pty=True)
33 | 
34 | 
35 | @task
36 | def test(ctx: Context) -> None:
37 |     """Check the tests with pytest."""
38 |     ctx.run(
39 |         "poetry run pytest "
40 |         "tests/pipelines/feature_engineering/test_create_vector_db.py "  # Feature Engineering
41 |         "tests/pipelines/feature_engineering/test_ingest_documents.py "  # Feature Engineering
42 |         "tests/pipelines/monitoring/test_generate_rag_dataset.py "  # Monitoring
43 |         "tests/pipelines/deployment/test_register_model.py "  # Deployment
44 |         "tests/pipelines/monitoring/test_pre_deploy_eval.py "  # Monitoring
45 |         "tests/pipelines/deployment/test_deploy_model.py "  # Deployment
46 |         "tests/io/test_services.py "  # IO
47 |         "tests/io/test_configs.py "  # IO
48 |         "tests/pipelines/test_base.py ",  # Base
49 |         # "--numprocesses='auto'"
50 |         pty=True,
51 |     )
52 | 
53 | 
54 | @task
55 | def security(ctx: Context) -> None:
56 |     """Check the security with bandit."""
57 |     ctx.run("poetry run bandit --recursive --configfile=pyproject.toml src/", pty=True)
58 | 
59 | 
60 | @task
61 | def coverage(ctx: Context) -> None:
62 |     """Check the coverage with coverage."""
63 |     ctx.run(
64 |         "poetry run pytest --cov=src/ --cov-fail-under=20 "
65 |         "tests/pipelines/feature_engineering/test_create_vector_db.py "  # Feature Engineering
66 |         "tests/pipelines/feature_engineering/test_ingest_documents.py "  # Feature Engineering
67 |         "tests/pipelines/monitoring/test_generate_rag_dataset.py "  # Monitoring
68 |         "tests/pipelines/deployment/test_register_model.py "  # Deployment
69 |         "tests/pipelines/monitoring/test_pre_deploy_eval.py "  # Monitoring
70 |         "tests/pipelines/deployment/test_deploy_model.py "  # Deployment
71 |         "tests/io/test_services.py "  # IO
72 |         "tests/io/test_configs.py "  # IO
73 |         "tests/pipelines/test_base.py ",  # Base
74 |         # "--numprocesses='auto'"
75 |         pty=True,
76 |     )
77 | 
78 | 
79 | @task(pre=[poetry, format, type, code, security, coverage], default=True)
80 | def all(_: Context) -> None:
81 |     """Run all check tasks."""
82 | 


--------------------------------------------------------------------------------
/tasks/cleans.py:
--------------------------------------------------------------------------------
  1 | """Clean tasks for pyinvoke."""
  2 | 
  3 | # %% IMPORTS
  4 | 
  5 | from invoke.context import Context
  6 | from invoke.tasks import task
  7 | 
  8 | # %% TASKS
  9 | 
 10 | # %% - Tools
 11 | 
 12 | 
 13 | @task
 14 | def mypy(ctx: Context) -> None:
 15 |     """Clean the mypy tool."""
 16 |     ctx.run("rm -rf .mypy_cache/")
 17 | 
 18 | 
 19 | @task
 20 | def ruff(ctx: Context) -> None:
 21 |     """Clean the ruff tool."""
 22 |     ctx.run("rm -rf .ruff_cache/")
 23 | 
 24 | 
 25 | @task
 26 | def pytest(ctx: Context) -> None:
 27 |     """Clean the pytest tool."""
 28 |     ctx.run("rm -rf .pytest_cache/")
 29 | 
 30 | 
 31 | @task
 32 | def coverage(ctx: Context) -> None:
 33 |     """Clean the coverage tool."""
 34 |     ctx.run("rm -f .coverage*")
 35 | 
 36 | 
 37 | # %% - Folders
 38 | 
 39 | 
 40 | @task
 41 | def dist(ctx: Context) -> None:
 42 |     """Clean the dist folder."""
 43 |     ctx.run("rm -f dist/*")
 44 | 
 45 | 
 46 | @task
 47 | def docs(ctx: Context) -> None:
 48 |     """Clean the docs folder."""
 49 |     ctx.run("rm -rf docs/*")
 50 | 
 51 | 
 52 | @task
 53 | def cache(ctx: Context) -> None:
 54 |     """Clean the cache folder."""
 55 |     ctx.run("rm -rf .cache/")
 56 | 
 57 | 
 58 | @task
 59 | def mlruns(ctx: Context) -> None:
 60 |     """Clean the mlruns folder."""
 61 |     ctx.run("rm -rf mlruns/*")
 62 | 
 63 | 
 64 | @task
 65 | def outputs(ctx: Context) -> None:
 66 |     """Clean the outputs folder."""
 67 |     ctx.run("rm -rf outputs/*")
 68 | 
 69 | 
 70 | # %% - Sources
 71 | 
 72 | 
 73 | @task
 74 | def venv(ctx: Context) -> None:
 75 |     """Clean the venv folder."""
 76 |     ctx.run("rm -rf .venv/")
 77 | 
 78 | 
 79 | @task
 80 | def poetry(ctx: Context) -> None:
 81 |     """Clean poetry lock file."""
 82 |     ctx.run("rm -f poetry.lock")
 83 | 
 84 | 
 85 | @task
 86 | def python(ctx: Context) -> None:
 87 |     """Clean python caches and bytecodes."""
 88 |     ctx.run("find . -type f -name '*.py[co]' -delete")
 89 |     ctx.run(r"find . -type d -name __pycache__ -exec rm -r {} \+")
 90 | 
 91 | 
 92 | # %% PROJECTS
 93 | 
 94 | 
 95 | @task
 96 | def requirements(ctx: Context) -> None:
 97 |     """Clean the project requirements file."""
 98 |     ctx.run("rm -f requirements.txt")
 99 | 
100 | 
101 | @task
102 | def environment(ctx: Context) -> None:
103 |     """Clean the project environment file."""
104 |     ctx.run("rm -f python_env.yaml")
105 | 
106 | 
107 | # %% - Combines
108 | 
109 | 
110 | @task(pre=[mypy, ruff, pytest, coverage])
111 | def tools(_: Context) -> None:
112 |     """Run all tools tasks."""
113 | 
114 | 
115 | @task(pre=[dist, docs, cache, mlruns, outputs])
116 | def folders(_: Context) -> None:
117 |     """Run all folders tasks."""
118 | 
119 | 
120 | @task(pre=[venv, poetry, python])
121 | def sources(_: Context) -> None:
122 |     """Run all sources tasks."""
123 | 
124 | 
125 | @task(pre=[requirements, environment])
126 | def projects(_: Context) -> None:
127 |     """Run all projects tasks."""
128 | 
129 | 
130 | @task(pre=[tools, folders], default=True)
131 | def all(_: Context) -> None:
132 |     """Run all tools and folders tasks."""
133 | 
134 | 
135 | @task(pre=[all, sources, projects])
136 | def reset(_: Context) -> None:
137 |     """Run all tools, folders, sources, and projects tasks."""
138 | 


--------------------------------------------------------------------------------
/tasks/commits.py:
--------------------------------------------------------------------------------
 1 | """Commits tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def info(ctx: Context) -> None:
13 |     """Print a guide for messages."""
14 |     ctx.run("poetry run cz info", pty=True)
15 | 
16 | 
17 | @task
18 | def bump(ctx: Context) -> None:
19 |     """Bump the version of the package."""
20 |     ctx.run("poetry run cz bump", pty=True)
21 | 
22 | 
23 | @task
24 | def commit(ctx: Context) -> None:
25 |     """Commit all changes with a message."""
26 |     ctx.run("poetry run cz commit", pty=True)
27 | 
28 | 
29 | @task(pre=[commit], default=True)
30 | def all(_: Context) -> None:
31 |     """Run all commit tasks."""
32 | 


--------------------------------------------------------------------------------
/tasks/containers.py:
--------------------------------------------------------------------------------
 1 | """Container tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | from . import packages
 9 | 
10 | # %% CONFIGS
11 | 
12 | IMAGE_TAG = "latest"
13 | 
14 | # %% TASKS
15 | 
16 | 
17 | @task
18 | def compose(ctx: Context) -> None:
19 |     """Start up docker compose."""
20 |     ctx.run("docker compose up")
21 | 
22 | 
23 | @task(pre=[packages.build])
24 | def build(ctx: Context, tag: str = IMAGE_TAG) -> None:
25 |     """Build the container image."""
26 |     ctx.run(f"docker build --tag={ctx.project.repository}:{tag} .")
27 | 
28 | 
29 | @task
30 | def run(ctx: Context, tag: str = IMAGE_TAG) -> None:
31 |     """Run the container image."""
32 |     ctx.run(f"docker run --rm {ctx.project.repository}:{tag}")
33 | 
34 | 
35 | @task(pre=[build, run], default=True)
36 | def all(_: Context) -> None:
37 |     """Run all container tasks."""
38 | 


--------------------------------------------------------------------------------
/tasks/docs.py:
--------------------------------------------------------------------------------
 1 | """Docs tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | from . import cleans
 9 | 
10 | # %% CONFIGS
11 | 
12 | DOC_FORMAT = "google"
13 | OUTPUT_DIR = "docs/"
14 | 
15 | # %% TASKS
16 | 
17 | 
18 | @task
19 | def serve(ctx: Context, format: str = DOC_FORMAT, port: int = 8088) -> None:
20 |     """Serve the API docs with pdoc."""
21 |     ctx.run(f"poetry run pdoc --docformat={format} --port={port} src/{ctx.project.package}")
22 | 
23 | 
24 | @task
25 | def api(ctx: Context, format: str = DOC_FORMAT, output_dir: str = OUTPUT_DIR) -> None:
26 |     """Generate the API docs with pdoc."""
27 |     ctx.run(
28 |         f"poetry run pdoc --docformat={format} --output-directory={output_dir} src/{ctx.project.package}"
29 |     )
30 | 
31 | 
32 | @task(pre=[cleans.docs, api], default=True)
33 | def all(_: Context) -> None:
34 |     """Run all docs tasks."""
35 | 


--------------------------------------------------------------------------------
/tasks/formats.py:
--------------------------------------------------------------------------------
 1 | """Format tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def imports(ctx: Context) -> None:
13 |     """Format python imports with ruff."""
14 |     ctx.run("poetry run ruff check --select I --fix")
15 | 
16 | 
17 | @task
18 | def sources(ctx: Context) -> None:
19 |     """Format python sources with ruff."""
20 |     ctx.run("poetry run ruff format src/ tasks/ tests/")
21 | 
22 | 
23 | @task(pre=[imports, sources], default=True)
24 | def all(_: Context) -> None:
25 |     """Run all format tasks."""
26 | 


--------------------------------------------------------------------------------
/tasks/installs.py:
--------------------------------------------------------------------------------
 1 | """Install tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def poetry(ctx: Context) -> None:
13 |     """Install poetry packages."""
14 |     ctx.run("poetry install")
15 | 
16 | 
17 | @task
18 | def pre_commit(ctx: Context) -> None:
19 |     """Install pre-commit hooks on git."""
20 |     ctx.run("poetry run pre-commit install --hook-type pre-push")
21 |     ctx.run("poetry run pre-commit install --hook-type commit-msg")
22 | 
23 | 
24 | @task(pre=[poetry, pre_commit], default=True)
25 | def all(_: Context) -> None:
26 |     """Run all install tasks."""
27 | 


--------------------------------------------------------------------------------
/tasks/mlflow.py:
--------------------------------------------------------------------------------
 1 | """Mlflow tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def doctor(ctx: Context) -> None:
13 |     """Run mlflow doctor."""
14 |     ctx.run("poetry run mlflow doctor")
15 | 
16 | 
17 | @task
18 | def serve(
19 |     ctx: Context, host: str = "127.0.0.1", port: str = "5000", backend_uri: str = "./mlruns"
20 | ) -> None:
21 |     """Start the mlflow server."""
22 |     ctx.run(
23 |         f"poetry run mlflow server --host={host} --port={port} --backend-store-uri={backend_uri}"
24 |     )
25 | 
26 | 
27 | @task(pre=[doctor, serve], default=True)
28 | def all(_: Context) -> None:
29 |     """Run all mlflow tasks."""
30 | 


--------------------------------------------------------------------------------
/tasks/packages.py:
--------------------------------------------------------------------------------
 1 | """Package tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | from . import cleans
 9 | 
10 | # %% CONFIGS
11 | 
12 | BUILD_FORMAT = "wheel"
13 | 
14 | # %% TASKS
15 | 
16 | 
17 | @task(pre=[cleans.dist])
18 | def build(ctx: Context, format: str = BUILD_FORMAT) -> None:
19 |     """Build the python package."""
20 |     ctx.run(f"poetry build --format={format}")
21 | 
22 | 
23 | @task(pre=[build], default=True)
24 | def all(_: Context) -> None:
25 |     """Run all package tasks."""
26 | 


--------------------------------------------------------------------------------
/tasks/projects.py:
--------------------------------------------------------------------------------
 1 | """Project tasks for pyinvoke."""
 2 | 
 3 | # mypy: disable-error-code="arg-type"
 4 | 
 5 | # %% IMPORTS
 6 | 
 7 | import json
 8 | 
 9 | from invoke.context import Context
10 | from invoke.tasks import call, task
11 | 
12 | # %% CONFIGS
13 | 
14 | PYTHON_VERSION = ".python-version"
15 | REQUIREMENTS = "requirements.txt"
16 | ENVIRONMENT = "python_env.yaml"
17 | 
18 | # %% TASKS
19 | 
20 | 
21 | @task
22 | def requirements(ctx: Context) -> None:
23 |     """Export the project requirements file."""
24 |     ctx.run(f"poetry export --without-urls --without-hashes --output={REQUIREMENTS}")
25 | 
26 | 
27 | @task(pre=[requirements])
28 | def environment(ctx: Context) -> None:
29 |     """Export the project environment file."""
30 |     with open(PYTHON_VERSION, "r") as reader:
31 |         python = reader.read().strip()  # version
32 |     configuration: dict[str, object] = {"python": python}
33 |     with open(REQUIREMENTS, "r") as reader:
34 |         dependencies: list[str] = []
35 |         for line in reader:
36 |             dependency = line.split(" ")[0]
37 |             if "pywin32" not in dependency:
38 |                 dependencies.append(dependency)
39 |     configuration["dependencies"] = dependencies
40 |     with open(ENVIRONMENT, "w") as writer:
41 |         # Safe as YAML is a superset of JSON
42 |         json.dump(configuration, writer, indent=4)
43 |         writer.write("\n")  # add new line at the end
44 | 
45 | 
46 | @task
47 | def run(ctx: Context, job: str) -> None:
48 |     """Run an mlflow project from the MLproject file."""
49 |     ctx.run(
50 |         f"poetry run mlflow run --experiment-name={ctx.project.repository}"
51 |         f" --run-name={job.capitalize()} -P job={job} ."
52 |     )
53 | 
54 | 
55 | @task(pre=[environment, call(run, job="main")], default=True)
56 | def all(_: Context) -> None:
57 |     """Run all project tasks."""
58 | 


--------------------------------------------------------------------------------
/tasks/serve.py:
--------------------------------------------------------------------------------
 1 | """Commits tasks for pyinvoke."""
 2 | 
 3 | # %% IMPORTS
 4 | 
 5 | from invoke.context import Context
 6 | from invoke.tasks import task
 7 | 
 8 | # %% TASKS
 9 | 
10 | 
11 | @task
12 | def serve(ctx: Context) -> None:
13 |     """Run the serving endpoint."""
14 |     ctx.run("poetry run python serving_endpoint/server.py", pty=True)
15 | 
16 | 
17 | @task(pre=[serve], default=True)
18 | def all(_: Context) -> None:
19 |     """Run all commit tasks."""
20 | 


--------------------------------------------------------------------------------
/tests/confs/invalid/0. invalid.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: UnknownJob


--------------------------------------------------------------------------------
/tests/confs/valid/0. feature_engineering.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: FeatureEngineeringJob
3 |   embedding_model: "amazon.titan-embed-text-v1"
4 |   vector_store_path: "http://localhost:6333"
5 |   document_path: "${tests_path:}/data/documents/"
6 |   collection_name: "hr-documents"


--------------------------------------------------------------------------------
/tests/confs/valid/1. deployment.yaml:
--------------------------------------------------------------------------------
1 | job:
2 |   KIND: DeploymentJob
3 |   staging_alias: "champion"
4 |   production_alias: "production"
5 |   registry_model_name: "pytest-rag-chatbot-with-guardrails"
6 |   llm_confs: "/confs/rag_chain_config.yaml"
7 |   llm_model_code_path: "/src/llmops_project/models/chatbot_with_guardrails.py"
8 |   vector_store_path: "http://localhost:6333"


--------------------------------------------------------------------------------
/tests/confs/valid/2. monitoring.yaml:
--------------------------------------------------------------------------------
 1 | job:
 2 |   KIND: MonitoringEvalJob
 3 |   vector_store_path: ""http://localhost:6333"
 4 |   registry_model_name: "rag-chatbot"
 5 |   qa_dataset_path: "${tests_path:}/data/datasets/rag_dataset.csv"
 6 |   alias: "champion"
 7 | 
 8 |   metric_tresholds:
 9 |     flesch_kincaid_grade_level_mean: 5.1 # bigger than
10 |     ari_grade_level_mean: 4.1 # bigger than
11 | 
12 |   trace_experiment_name: "rag_chatbot_experiment"
13 |   monitoring_experiment_name: "monitoring"
14 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """Configuration for the tests."""
  2 | 
  3 | # %% IMPORTS
  4 | 
  5 | import os
  6 | import typing as T
  7 | 
  8 | import omegaconf
  9 | import pytest
 10 | from _pytest import logging as pl
 11 | from llmops_project.io import services
 12 | 
 13 | # %% CONFIGS
 14 | 
 15 | LIMIT = 1500
 16 | N_SPLITS = 3
 17 | TEST_SIZE = 24 * 7  # 1 week
 18 | 
 19 | # %% FIXTURES
 20 | 
 21 | # %% - Paths
 22 | 
 23 | 
 24 | @pytest.fixture(scope="session")
 25 | def tests_path() -> str:
 26 |     """Return the path of the tests folder."""
 27 |     file = os.path.abspath(__file__)
 28 |     parent = os.path.dirname(file)
 29 |     return parent
 30 | 
 31 | 
 32 | @pytest.fixture(scope="session")
 33 | def data_path(tests_path: str) -> str:
 34 |     """Return the path of the data folder."""
 35 |     return os.path.join(tests_path, "data")
 36 | 
 37 | 
 38 | @pytest.fixture(scope="session")
 39 | def confs_path(tests_path: str) -> str:
 40 |     """Return the path of the confs folder."""
 41 |     return os.path.join(tests_path, "confs")
 42 | 
 43 | 
 44 | @pytest.fixture(scope="session")
 45 | def inputs_path(data_path: str) -> str:
 46 |     """Return the path of the inputs dataset."""
 47 |     return os.path.join(data_path, "inputs_sample.parquet")
 48 | 
 49 | 
 50 | @pytest.fixture(scope="session")
 51 | def targets_path(data_path: str) -> str:
 52 |     """Return the path of the targets dataset."""
 53 |     return os.path.join(data_path, "targets_sample.parquet")
 54 | 
 55 | 
 56 | @pytest.fixture(scope="session")
 57 | def outputs_path(data_path: str) -> str:
 58 |     """Return the path of the outputs dataset."""
 59 |     return os.path.join(data_path, "outputs_sample.parquet")
 60 | 
 61 | 
 62 | @pytest.fixture(scope="session")
 63 | def session_tmp_path(tmp_path_factory) -> str:
 64 |     """Create a session-scoped temporary directory."""
 65 |     return tmp_path_factory.mktemp("session_tmp")
 66 | 
 67 | 
 68 | @pytest.fixture(scope="session")
 69 | def tmp_outputs_path(session_tmp_path: str) -> str:
 70 |     """Return a session-scoped tmp path for the outputs dataset."""
 71 |     return os.path.join(session_tmp_path, "outputs.parquet")
 72 | 
 73 | 
 74 | @pytest.fixture(scope="session")
 75 | def tmp_models_explanations_path(session_tmp_path: str) -> str:
 76 |     """Return a session-scoped tmp path for the model explanations dataset."""
 77 |     return os.path.join(session_tmp_path, "models_explanations.parquet")
 78 | 
 79 | 
 80 | @pytest.fixture(scope="session")
 81 | def tmp_samples_explanations_path(session_tmp_path: str) -> str:
 82 |     """Return a session-scoped tmp path for the samples explanations dataset."""
 83 |     return os.path.join(session_tmp_path, "samples_explanations.parquet")
 84 | 
 85 | 
 86 | # %% - Configs
 87 | 
 88 | 
 89 | @pytest.fixture(scope="session")
 90 | def extra_config() -> str:
 91 |     """Extra config for scripts."""
 92 |     # use OmegaConf resolver: ${tmp_path:}
 93 |     config = """
 94 |     {
 95 |         "job": {
 96 |             "alerts_service": {
 97 |                 "enable": false,
 98 |             },
 99 |             "mlflow_service": {
100 |                 "tracking_uri": "${tmp_path:}/tracking/",
101 |                 "registry_uri": "${tmp_path:}/registry/",
102 |             }
103 |         }
104 |     }
105 |     """
106 |     return config
107 | 
108 | 
109 | # %% - Resolvers
110 | 
111 | 
112 | @pytest.fixture(scope="session", autouse=True)
113 | def tests_path_resolver(tests_path: str) -> str:
114 |     """Register the tests path resolver with OmegaConf."""
115 | 
116 |     def resolver() -> str:
117 |         """Get tests path."""
118 |         return tests_path
119 | 
120 |     omegaconf.OmegaConf.register_new_resolver("tests_path", resolver, use_cache=True, replace=False)
121 |     return tests_path
122 | 
123 | 
124 | @pytest.fixture(scope="session", autouse=True)
125 | def tmp_path_resolver(session_tmp_path: str) -> str:
126 |     """Register the session-scoped tmp path resolver with OmegaConf."""
127 | 
128 |     def resolver() -> str:
129 |         """Get session tmp data path."""
130 |         return session_tmp_path
131 | 
132 |     omegaconf.OmegaConf.register_new_resolver("tmp_path", resolver, use_cache=False, replace=True)
133 |     return session_tmp_path
134 | 
135 | 
136 | # %% - Services
137 | 
138 | 
139 | @pytest.fixture(scope="session", autouse=True)
140 | def logger_service() -> T.Generator[services.LoggerService, None, None]:
141 |     """Return and start the logger service."""
142 |     service = services.LoggerService(colorize=False, diagnose=True)
143 |     service.start()
144 |     yield service
145 |     service.stop()
146 | 
147 | 
148 | @pytest.fixture
149 | def logger_caplog(
150 |     caplog: pl.LogCaptureFixture, logger_service: services.LoggerService
151 | ) -> T.Generator[pl.LogCaptureFixture, None, None]:
152 |     """Extend pytest caplog fixture with the logger service (loguru)."""
153 |     # https://loguru.readthedocs.io/en/stable/resources/migration.html#replacing-caplog-fixture-from-pytest-library
154 |     logger = logger_service.logger()
155 |     handler_id = logger.add(
156 |         caplog.handler,
157 |         level=0,
158 |         format="{message}",
159 |         filter=lambda record: record["level"].no >= caplog.handler.level,
160 |         enqueue=False,  # Set to 'True' if your test is spawning child processes.
161 |     )
162 |     yield caplog
163 |     logger.remove(handler_id)
164 | 
165 | 
166 | # @pytest.fixture(scope="session", autouse=True)
167 | # def alerts_service() -> T.Generator[services.AlertsService, None, None]:
168 | #     """Return and start the alerter service."""
169 | #     service = services.AlertsService(enable=False)
170 | #     service.start()
171 | #     yield service
172 | #     service.stop()
173 | 
174 | 
175 | @pytest.fixture(scope="session", autouse=True)
176 | def mlflow_service(session_tmp_path: str) -> T.Generator[services.MlflowService, None, None]:
177 |     """Return and start the mlflow service."""
178 |     service = services.MlflowService(
179 |         tracking_uri=f"{session_tmp_path}/tracking/",
180 |         registry_uri=f"{session_tmp_path}/registry/",
181 |         experiment_name="Experiment-Testing",
182 |         registry_name="Registry-Testing",
183 |     )
184 |     service.start()
185 |     yield service
186 |     service.stop()
187 | 
188 | 
189 | # %% - Signatures
190 | 


--------------------------------------------------------------------------------
/tests/documents/sample_hr_manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/callmesora/llmops-python-package/b4db451c043f1975db0c0437c00e664bbab9a3cd/tests/documents/sample_hr_manual.pdf


--------------------------------------------------------------------------------
/tests/io/test_configs.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | 
 3 | import os
 4 | 
 5 | import omegaconf as oc
 6 | from llmops_project.io import configs
 7 | 
 8 | # %% PARSERS
 9 | 
10 | 
11 | def test_parse_file(tmp_path: str) -> None:
12 |     # given
13 |     text = """
14 |     a: 1
15 |     b: True
16 |     c: [3, 4]
17 |     """
18 |     path = os.path.join(tmp_path, "config.yml")
19 |     with open(path, "w", encoding="utf-8") as writer:
20 |         writer.write(text)
21 |     # when
22 |     config = configs.parse_file(path)
23 |     # then
24 |     assert config == {
25 |         "a": 1,
26 |         "b": True,
27 |         "c": [3, 4],
28 |     }, "File config should be parsed correctly!"
29 | 
30 | 
31 | def test_parse_string() -> None:
32 |     # given
33 |     text = """{"a": 1, "b": 2, "data": [3, 4]}"""
34 |     # when
35 |     config = configs.parse_string(text)
36 |     # then
37 |     assert config == {
38 |         "a": 1,
39 |         "b": 2,
40 |         "data": [3, 4],
41 |     }, "String config should be parsed correctly!"
42 | 
43 | 
44 | # %% MERGERS
45 | 
46 | 
47 | def test_merge_configs() -> None:
48 |     # given
49 |     confs = [oc.OmegaConf.create({"x": i, i: i}) for i in range(3)]
50 |     # when
51 |     config = configs.merge_configs(confs)
52 |     # then
53 |     assert config == {
54 |         0: 0,
55 |         1: 1,
56 |         2: 2,
57 |         "x": 2,
58 |     }, "Configs should be merged correctly!"
59 | 
60 | 
61 | # %% CONVERTERS
62 | 
63 | 
64 | def test_to_object() -> None:
65 |     # given
66 |     values = {
67 |         "a": 1,
68 |         "b": True,
69 |         "c": [3, 4],
70 |     }
71 |     config = oc.OmegaConf.create(values)
72 |     # when
73 |     object_ = configs.to_object(config)
74 |     # then
75 |     assert object_ == values, "Object should be the same!"
76 |     assert isinstance(object_, dict), "Object should be a dict!"
77 | 


--------------------------------------------------------------------------------
/tests/io/test_services.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | 
 3 | import _pytest.logging as pl
 4 | import mlflow
 5 | from llmops_project.io import services
 6 | 
 7 | # %% SERVICES
 8 | 
 9 | 
10 | def test_logger_service(
11 |     logger_service: services.LoggerService, logger_caplog: pl.LogCaptureFixture
12 | ) -> None:
13 |     # given
14 |     service = logger_service
15 |     logger = service.logger()
16 |     # when
17 |     logger.debug("DEBUG")
18 |     logger.error("ERROR")
19 |     # then
20 |     assert "DEBUG" in logger_caplog.messages, "Debug message should be logged!"
21 |     assert "ERROR" in logger_caplog.messages, "Error message should be logged!"
22 | 
23 | 
24 | def test_mlflow_service(mlflow_service: services.MlflowService) -> None:
25 |     # given
26 |     service = mlflow_service
27 |     run_config = mlflow_service.RunConfig(
28 |         name="testing",
29 |         tags={"service": "mlflow"},
30 |         description="a test run.",
31 |         log_system_metrics=True,
32 |     )
33 |     # when
34 |     client = service.client()
35 |     with service.run_context(run_config=run_config) as context:
36 |         pass
37 |     finished = client.get_run(run_id=context.info.run_id)
38 |     # then
39 |     # - run
40 |     assert run_config.tags is not None, "Run config tags should be set!"
41 |     # - mlflow
42 |     assert service.tracking_uri == mlflow.get_tracking_uri(), "Tracking URI should be the same!"
43 |     assert service.registry_uri == mlflow.get_registry_uri(), "Registry URI should be the same!"
44 |     assert mlflow.get_experiment_by_name(service.experiment_name), "Experiment should be setup!"
45 |     # - client
46 |     assert service.tracking_uri == client.tracking_uri, "Tracking URI should be the same!"
47 |     assert service.registry_uri == client._registry_uri, "Tracking URI should be the same!"
48 |     assert client.get_experiment_by_name(service.experiment_name), "Experiment should be setup!"
49 |     # - context
50 |     assert context.info.run_name == run_config.name, "Context name should be the same!"
51 |     assert (
52 |         run_config.description in context.data.tags.values()
53 |     ), "Context desc. should be in tags values!"
54 |     assert (
55 |         context.data.tags.items() > run_config.tags.items()
56 |     ), "Context tags should be a subset of the given tags!"
57 |     assert context.info.status == "RUNNING", "Context should be running!"
58 |     # - finished
59 |     assert finished.info.status == "FINISHED", "Finished should be finished!"
60 | 


--------------------------------------------------------------------------------
/tests/pipelines/deployment/test_deploy_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from llmops_project.pipelines.deployment.deploy_model import DeployModelJob
 3 | 
 4 | # %% IMPORTS
 5 | 
 6 | 
 7 | # %% TESTS
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "registry_model_name, staging_alias, production_alias",
12 |     [
13 |         ("test_model", "champion", "production"),
14 |         pytest.param(
15 |             "invalid_model",
16 |             "champion",
17 |             "production",
18 |             marks=pytest.mark.xfail(reason="Invalid model name", raises=Exception),
19 |         ),
20 |     ],
21 | )
22 | def test_deploy_model_job(
23 |     mlflow_service,
24 |     logger_service,
25 |     registry_model_name: str,
26 |     staging_alias: str,
27 |     production_alias: str,
28 | ):
29 |     job = DeployModelJob(
30 |         registry_model_name=registry_model_name,
31 |         staging_alias=staging_alias,
32 |         production_alias=production_alias,
33 |         mlflow_service=mlflow_service,
34 |         logger_service=logger_service,
35 |     )
36 | 
37 |     with job as runner:
38 |         result = runner.run()
39 | 
40 |     assert set(result.keys()) == {
41 |         "self",
42 |         "logger",
43 |         "client",
44 |     }
45 | 
46 |     model_version = result["client"].get_model_version_by_alias(
47 |         name=registry_model_name, alias=production_alias
48 |     )
49 |     tags = model_version.tags
50 |     assert tags["passed_tests"] == "True"
51 | 


--------------------------------------------------------------------------------
/tests/pipelines/deployment/test_register_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from llmops_project.pipelines.deployment.register_model import LogAndRegisterModelJob
 3 | 
 4 | # %% IMPORTS
 5 | 
 6 | 
 7 | # %% TESTS
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     "registry_model_name, staging_alias, llm_model_code_path, llm_confs, vector_store_path",
12 |     [
13 |         (
14 |             "test_model",
15 |             "champion",
16 |             "/src/llmops_project/models/chatbot_with_guardrails.py",
17 |             "/confs/rag_chain_config.yaml",
18 |             "http://localhost:6333",
19 |         ),
20 |         pytest.param(
21 |             "invalid_model",
22 |             "champion",
23 |             "/invalid/path/to/model/code",
24 |             "/invalid/path/to/config",
25 |             "/invalid/path/to/vector/store",
26 |             marks=pytest.mark.xfail(reason="Invalid paths", raises=Exception),
27 |         ),
28 |     ],
29 | )
30 | def test_log_and_register_model_job(
31 |     mlflow_service,
32 |     logger_service,
33 |     registry_model_name: str,
34 |     staging_alias: str,
35 |     llm_model_code_path: str,
36 |     llm_confs: str,
37 |     vector_store_path: str,
38 | ):
39 |     # Given: A LogAndRegisterModelJob instance with the provided parameters
40 |     job = LogAndRegisterModelJob(
41 |         registry_model_name=registry_model_name,
42 |         staging_alias=staging_alias,
43 |         llm_model_code_path=llm_model_code_path,
44 |         llm_confs=llm_confs,
45 |         vector_store_path=vector_store_path,
46 |         mlflow_service=mlflow_service,
47 |         logger_service=logger_service,
48 |     )
49 | 
50 |     # When: The job is run
51 |     with job as runner:
52 |         out = runner.run()
53 | 
54 |     # Then: Verify the expected results
55 |     assert set(out) == {
56 |         "model_specs",
57 |         "self",
58 |         "llm_code_path",
59 |         "logger",
60 |         "client",
61 |         "config_path",
62 |         "run_id",
63 |         "vector_store_path",
64 |         "input_example",
65 |         "script_dir",
66 |     }
67 | 
68 |     # Verify  if model was registered by checking if the model version exists
69 | 
70 |     latest_version = out["client"].get_model_version_by_alias(registry_model_name, staging_alias)
71 | 
72 |     tags = out["client"].get_model_version(registry_model_name, latest_version.version).tags
73 | 
74 |     assert "passed_tests" in tags, "Tag 'passed_tests' does not exist."
75 | 


--------------------------------------------------------------------------------
/tests/pipelines/feature_engineering/test_create_vector_db.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | from langchain_aws import BedrockEmbeddings
 5 | from langchain_qdrant import QdrantVectorStore
 6 | from llmops_project.io import services
 7 | from llmops_project.io.vector_db import QdrantVectorDB
 8 | from llmops_project.pipelines.feature_engineering.create_vector_db import CreateVectorDBJob
 9 | 
10 | # %% IMPORTS
11 | 
12 | 
13 | # %% TESTS
14 | 
15 | 
16 | @pytest.fixture
17 | def mock_bedrock_embeddings():
18 |     return mock.Mock(spec=BedrockEmbeddings)
19 | 
20 | 
21 | @pytest.fixture
22 | def mock_qdrant_vector_db():
23 |     return mock.Mock(spec=QdrantVectorDB)
24 | 
25 | 
26 | @pytest.mark.parametrize(
27 |     "embedding_model, collection_name, vector_store_path",
28 |     [
29 |         ("amazon.titan-embed-text-v1", "test_collection", "http://localhost:6333"),
30 |         pytest.param(
31 |             "amazon.titan-embed-text-v1",
32 |             "test_collection",
33 |             "http://localhost:6334",
34 |             marks=pytest.mark.xfail(reason="Invalid localhost port", raises=Exception),
35 |         ),
36 |     ],
37 | )
38 | def test_create_vector_db_job(
39 |     logger_service: services.LoggerService,
40 |     mlflow_service: services.MlflowService,
41 |     embedding_model: str,
42 |     collection_name: str,
43 |     vector_store_path: str,
44 | ):
45 |     job = CreateVectorDBJob(
46 |         embedding_model=embedding_model,
47 |         collection_name=collection_name,
48 |         vector_store_path=vector_store_path,
49 |         logger_service=logger_service,
50 |     )
51 | 
52 |     with job as runner:
53 |         out = runner.run()
54 | 
55 |     assert set(out) == {"self", "logger", "embeddings", "vector_db", "script_dir"}
56 | 
57 |     # Vector Db
58 |     assert out["vector_db"].embeddings.model_id == embedding_model
59 |     assert out["vector_db"].collection_name == collection_name
60 | 
61 |     assert out["embeddings"].model_id == embedding_model
62 | 
63 |     try:
64 |         QdrantVectorStore.from_existing_collection(
65 |             embedding=out["embeddings"],
66 |             collection_name=collection_name,
67 |             url=vector_store_path,
68 |         )
69 |     except Exception as e:
70 |         pytest.fail(f"Failed to create QdrantVectorStore: {e}")
71 | 


--------------------------------------------------------------------------------
/tests/pipelines/feature_engineering/test_ingest_documents.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | from langchain_aws import BedrockEmbeddings
 5 | from llmops_project.io import services
 6 | from llmops_project.io.vector_db import QdrantVectorDB
 7 | from llmops_project.pipelines.feature_engineering.ingest_documents import IngestAndUpdateVectorDBJob
 8 | 
 9 | # %% IMPORTS
10 | 
11 | 
12 | # %% TESTS
13 | 
14 | 
15 | @pytest.fixture
16 | def mock_bedrock_embeddings():
17 |     return mock.Mock(spec=BedrockEmbeddings)
18 | 
19 | 
20 | @pytest.fixture
21 | def mock_qdrant_vector_db():
22 |     return mock.Mock(spec=QdrantVectorDB)
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     "embedding_model, collection_name, vector_store_path, document_path",
27 |     [
28 |         (
29 |             "amazon.titan-embed-text-v1",
30 |             "test_collection",
31 |             "http://localhost:6333",
32 |             "/tests/documents/",
33 |         ),
34 |         pytest.param(
35 |             "invalid_model",
36 |             "test_collection",
37 |             "http://localhost:6333",
38 |             "/tests/documents/",
39 |             marks=pytest.mark.xfail(reason="Invalid embedding model", raises=Exception),
40 |         ),
41 |         pytest.param(
42 |             "amazon.titan-embed-text-v1",
43 |             "test_collection",
44 |             "http://localhost:6333",
45 |             "/invalid_path",
46 |             marks=pytest.mark.xfail(reason=" Directory not found", raises=FileNotFoundError),
47 |         ),
48 |     ],
49 | )
50 | def test_ingest_and_update_vector_db_job(
51 |     logger_service: services.LoggerService,
52 |     embedding_model: str,
53 |     collection_name: str,
54 |     vector_store_path: str,
55 |     document_path: str,
56 | ):
57 |     job = IngestAndUpdateVectorDBJob(
58 |         embedding_model=embedding_model,
59 |         collection_name=collection_name,
60 |         vector_store_path=vector_store_path,
61 |         document_path=document_path,
62 |         logger_service=logger_service,
63 |     )
64 | 
65 |     with job as runner:
66 |         result = runner.run()
67 | 
68 |     assert set(result.keys()) == {
69 |         "self",
70 |         "logger",
71 |         "embeddings",
72 |         "vector_db",
73 |         "script_dir",
74 |         "document_path",
75 |     }
76 | 
77 |     # Try Querying the Qdrant Vector Store
78 |     assert result["vector_db"].embeddings.model_id == embedding_model
79 |     assert result["vector_db"].collection_name == collection_name
80 | 
81 |     query_results = result["vector_db"].query_database("What is the content of the documents?")
82 |     for res in query_results:
83 |         assert set(res.keys()) == {"score", "text", "source"}
84 |         assert res["score"] is not None
85 |         assert isinstance(res["text"], str)
86 |         assert isinstance(res["source"], str)
87 | 


--------------------------------------------------------------------------------
/tests/pipelines/monitoring/test_generate_rag_dataset.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import pytest
 4 | from llmops_project.pipelines.monitoring.generate_rag_dataset import GenerateRagDatasetJob
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "data_path, qa_dataset_path_csv, qa_dataset_path_json, llm_model",
 9 |     [
10 |         (
11 |             "/tests/documents/",
12 |             "data/qa_dataset.csv",
13 |             "data/qa_dataset.json",
14 |             "anthropic.claude-3-haiku-20240307-v1:0",
15 |         ),
16 |         pytest.param(
17 |             "/invalid_path",
18 |             "data/qa_dataset.csv",
19 |             "data/qa_dataset.json",
20 |             "anthropic.claude-3-haiku-20240307-v1:0",
21 |             marks=pytest.mark.xfail(reason="Invalid data path", raises=Exception),
22 |         ),
23 |     ],
24 | )
25 | def test_generate_rag_dataset_job(
26 |     logger_service,
27 |     data_path: str,
28 |     qa_dataset_path_csv: str,
29 |     qa_dataset_path_json: str,
30 |     llm_model: str,
31 | ):
32 |     # Given: A GenerateRagDatasetJob instance with the provided parameters
33 |     job = GenerateRagDatasetJob(
34 |         data_path=data_path,
35 |         qa_dataset_path_csv=qa_dataset_path_csv,
36 |         qa_dataset_path_json=qa_dataset_path_json,
37 |         llm_model=llm_model,
38 |         logger_service=logger_service,
39 |     )
40 | 
41 |     # When: The job is run
42 |     with job as runner:
43 |         out = runner.run()
44 | 
45 |     # Then: Verify the expected results
46 |     assert set(out) == {
47 |         "self",
48 |         "data_path",
49 |         "final_dataset_path",
50 |         "final_dataset_json_path",
51 |         "logger",
52 |         "script_dir",
53 |         "project_root",
54 |     }
55 | 
56 |     # Verify if the CSV and JSON files are created
57 |     assert Path(out["final_dataset_path"]).exists(), "CSV file was not created."
58 |     assert Path(out["final_dataset_json_path"]).exists(), "JSON file was not created."
59 | 


--------------------------------------------------------------------------------
/tests/pipelines/monitoring/test_pre_deploy_eval.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from llmops_project.pipelines.monitoring.pre_deploy_eval import EvaluateModelJob
 3 | 
 4 | # %% TESTS
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "qa_dataset_path, registry_model_name, alias, vector_store_path, metric_tresholds, expect_failure",
 9 |     [
10 |         (
11 |             "/data/qa_dataset.csv",
12 |             "test_model",
13 |             "champion",
14 |             "http://localhost:6333",
15 |             {"flesch_kincaid_grade_level_mean": 5.0, "ari_grade_level_mean": 5.0},
16 |             False,
17 |         ),
18 |         pytest.param(
19 |             "/invalid/path/to/qa_dataset.csv",
20 |             "test_model",
21 |             "champion",
22 |             "/invalid/path/to/vector_store",
23 |             {"flesch_kincaid_grade_level_mean": 5.0, "ari_grade_level_mean": 5.0},
24 |             True,
25 |             marks=pytest.mark.xfail(reason="Invalid paths", raises=Exception),
26 |         ),
27 |     ],
28 | )
29 | def test_evaluate_model_job(
30 |     mlflow_service,
31 |     logger_service,
32 |     qa_dataset_path: str,
33 |     registry_model_name: str,
34 |     alias: str,
35 |     vector_store_path: str,
36 |     metric_tresholds: dict,
37 |     expect_failure: bool,
38 | ):
39 |     # Given: An EvaluateModelJob instance with the provided parameters
40 |     job = EvaluateModelJob(
41 |         qa_dataset_path=qa_dataset_path,
42 |         registry_model_name=registry_model_name,
43 |         alias=alias,
44 |         vector_store_path=vector_store_path,
45 |         metric_tresholds=metric_tresholds,
46 |         mlflow_service=mlflow_service,
47 |         logger_service=logger_service,
48 |     )
49 | 
50 |     # When: The job is run
51 |     with job as runner:
52 |         out = runner.run()
53 | 
54 |     # Then: Verify the expected results
55 |     if expect_failure:
56 |         assert "eval_df" not in out, "Evaluation DataFrame should not be present."
57 |     else:
58 |         assert "eval_df" in out, "Evaluation DataFrame not found in output."
59 |         assert "results" in out, "Results not found in output."
60 | 
61 |         # Verify the output variables
62 |         assert set(out) == {
63 |             "logger",
64 |             "script_dir",
65 |             "data_path",
66 |             "eval_df",
67 |             "model",
68 |             "results",
69 |             "result_metrics",
70 |             "metrics",
71 |             "thresholds",
72 |             "beats_baseline",
73 |             "self",
74 |             "threshold",
75 |             "metric",
76 |         }
77 | 


--------------------------------------------------------------------------------
/tests/pipelines/test_base.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | from llmops_project.io import services
 3 | from llmops_project.pipelines import base
 4 | 
 5 | # %% JOBS
 6 | 
 7 | 
 8 | def test_job(
 9 |     logger_service: services.LoggerService,
10 |     mlflow_service: services.MlflowService,
11 | ) -> None:
12 |     # given
13 |     class MyJob(base.Job):
14 |         KIND: str = "MyJob"
15 | 
16 |         def run(self) -> base.Locals:
17 |             a, b = 1, "test"
18 |             return locals()
19 | 
20 |     job = MyJob(logger_service=logger_service, mlflow_service=mlflow_service)
21 |     # when
22 |     with job as runner:
23 |         out = runner.run()
24 |     # then
25 |     # - inputs
26 |     assert hasattr(job, "logger_service"), "Job should have an Logger service!"
27 |     assert hasattr(job, "mlflow_service"), "Job should have an Mlflow service!"
28 |     # - outputs
29 |     assert set(out) == {"self", "a", "b"}, "Run should return local variables!"
30 | 


--------------------------------------------------------------------------------
/tests/test_scripts.py:
--------------------------------------------------------------------------------
 1 | # %% IMPORTS
 2 | 
 3 | 
 4 | # %% FUNCTIONS
 5 | 
 6 | 
 7 | # %% SCRIPTS
 8 | 
 9 | 
10 | # def test_schema(capsys: pc.CaptureFixture[str]) -> None:
11 | #     # given
12 | #     args = ["prog", "--schema"]
13 | #     # when
14 | #     scripts.main(args)
15 | #     captured = capsys.readouterr()
16 | #     # then
17 | #     assert captured.err == "", "Captured error should be empty!"
18 | #     assert json.loads(captured.out), "Captured output should be a JSON!"
19 | 
20 | 
21 | # @pytest.mark.parametrize(
22 | #     "scenario",
23 | #     [
24 | #         "valid",
25 | #         pytest.param(
26 | #             "invalid",
27 | #             marks=pytest.mark.xfail(
28 | #                 reason="Invalid config.",
29 | #                 raises=pdt.ValidationError,
30 | #             ),
31 | #         ),
32 | #     ],
33 | # )
34 | # def test_main(scenario: str, confs_path: str, extra_config: str) -> None:
35 | #     # given
36 | #     folder = os.path.join(confs_path, scenario)
37 | #     confs = list(sorted(os.listdir(folder)))
38 | #     # when
39 | #     for conf in confs:  # one job per config
40 | #         config = os.path.join(folder, conf)
41 | #         argv = [config, "-e", extra_config]
42 | #         status = scripts.main(argv=argv)
43 | #         # then
44 | #         assert status == 0, f"Job should succeed for config: {config}"
45 | 
46 | 
47 | # def test_main__no_configs() -> None:
48 | #     # given
49 | #     argv: list[str] = []
50 | #     # when
51 | #     with pytest.raises(RuntimeError) as error:
52 | #         scripts.main(argv)
53 | #     # then
54 | #     assert error.match("No configs provided."), "RuntimeError should be raised!"
55 | 
56 | 
57 | # %%
58 | 


--------------------------------------------------------------------------------