├── .env.example ├── .env.example.docker ├── .github ├── dependabot.yml └── workflows │ └── test-deploy.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── Dockerfile.ollama ├── LICENSE ├── Makefile ├── README.md ├── assets ├── embeddings_img.png ├── icon.svg ├── promptfoo_img.png └── streamlit_img.png ├── docker-compose.yml ├── mkdocs.yml ├── pyproject.toml ├── scripts └── gen_doc_stubs.py ├── src ├── __init__.py ├── api │ ├── __init__.py │ ├── api.py │ ├── api_route.py │ └── log_config.py ├── evaluation │ ├── __init__.py │ ├── configs │ │ ├── __init__.py │ │ ├── config_baseline.py │ │ ├── config_compare_prompts.yaml │ │ ├── config_json.py │ │ ├── config_json.yaml │ │ ├── config_simple.yaml │ │ └── redteam_config.yaml │ ├── context.py │ ├── data │ │ ├── test_json.csv │ │ └── test_simple.csv │ ├── instructions.txt │ └── metrics │ │ ├── __init__.py │ │ ├── data_types.py │ │ ├── information_extraction │ │ ├── __init__.py │ │ ├── entity_level.py │ │ ├── exact_match_json.py │ │ ├── missing_fields.py │ │ └── similarity_json.py │ │ ├── order_aware │ │ ├── __init__.py │ │ └── reciprocal_rank.py │ │ ├── order_unaware │ │ ├── __init__.py │ │ ├── f1_at_k.py │ │ ├── precision_at_k.py │ │ └── recall_at_k.py │ │ ├── ragas_metrics │ │ ├── __init__.py │ │ ├── ragas_answer_correctness.py │ │ ├── ragas_answer_relevancy.py │ │ ├── ragas_answer_similarity.py │ │ ├── ragas_context_entity_recall.py │ │ ├── ragas_context_precision.py │ │ ├── ragas_context_recall.py │ │ ├── ragas_context_utilization.py │ │ ├── ragas_faithfulness.py │ │ └── ragas_harmfulness.py │ │ └── utils.py ├── main_backend.py ├── main_frontend.py ├── ml │ ├── __init__.py │ ├── ai.py │ └── llm.py ├── pages │ ├── 0_chat.py │ ├── 1_embeddings.py │ ├── 2_azure_rag.py │ ├── 3_fastapi_azure_rag.py │ └── __init__.py ├── settings_env.py └── utils.py ├── tests ├── __init__.py ├── readme.txt ├── test_llm_endpoint.py ├── test_rag.py └── test_settings.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | # -- DEV MODE if true, log debugs and traces 2 | DEV_MODE=True 3 | 4 | # Ollama and ollamazure models to emulate openai or azure_openai 5 | # run make run-ollama or make run-ollamazure to emulate openai or azure_openai locally 6 | OLLAMA_MODEL_NAME=qwen2.5:0.5b 7 | OLLAMA_EMBEDDING_MODEL_NAME=all-minilm:l6-v2 8 | 9 | INFERENCE_DEPLOYMENT_NAME=ollama_chat/qwen2.5:0.5b 10 | INFERENCE_BASE_URL=http://localhost:11434 11 | INFERENCE_API_KEY=t 12 | 13 | EMBEDDINGS_DEPLOYMENT_NAME=ollama/all-minilm:l6-v2 14 | EMBEDDINGS_BASE_URL=http://localhost:11434 15 | EMBEDDINGS_API_KEY=t 16 | 17 | # -- FASTAPI 18 | FASTAPI_HOST=localhost 19 | FASTAPI_PORT=8080 20 | # -- Streamlit 21 | STREAMLIT_PORT=8501 22 | 23 | ####################### EVALUATION ############################ 24 | # (Optional) If you want to use Promptfoo and ragas, the eval tool 25 | ENABLE_EVALUATION=False 26 | EVALUATOR_DEPLOYMENT_NAME=ollama_chat/qwen2.5:0.5b 27 | EVALUATOR_BASE_URL="http://localhost:11434/" 28 | EVALUATOR_API_KEY="t" 29 | 30 | 31 | ####################### AI SEARCH ############################ 32 | # (Optional) If you want to use Azure Search AI 33 | ENABLE_AZURE_SEARCH=False 34 | AZURE_SEARCH_TOP_K=3 35 | AZURE_SEARCH_API_KEY="" 36 | AZURE_SEARCH_INDEX_NAME="" 37 | AZURE_SEARCH_INDEXER_NAME="" 38 | AZURE_SEARCH_SERVICE_ENDPOINT="" 39 | SEMENTIC_CONFIGURATION_NAME="" 40 | # -- AZURE BLOB STORAGE 41 | AZURE_STORAGE_ACCOUNT_NAME="" 42 | AZURE_STORAGE_ACCOUNT_KEY="" 43 | AZURE_CONTAINER_NAME="" 44 | -------------------------------------------------------------------------------- /.env.example.docker: -------------------------------------------------------------------------------- 1 | # -- DEV MODE if true, log debugs and traces 2 | DEV_MODE=True 3 | 4 | # Ollama and ollamazure models to emulate openai or azure_openai 5 | # run make run-ollama or make run-ollamazure to emulate openai or azure_openai locally 6 | OLLAMA_MODEL_NAME=qwen2.5:0.5b 7 | OLLAMA_EMBEDDING_MODEL_NAME=all-minilm:l6-v2 8 | 9 | INFERENCE_DEPLOYMENT_NAME=ollama_chat/qwen2.5:0.5b 10 | INFERENCE_BASE_URL=http://ollama:11434 11 | INFERENCE_API_KEY=t 12 | 13 | EMBEDDINGS_DEPLOYMENT_NAME=ollama/all-minilm:l6-v2 14 | EMBEDDINGS_BASE_URL=http://ollama:11434 15 | EMBEDDINGS_API_KEY=t 16 | 17 | # -- FASTAPI 18 | FASTAPI_HOST=localhost 19 | FASTAPI_PORT=8080 20 | # -- Streamlit 21 | STREAMLIT_PORT=8501 22 | 23 | ####################### EVALUATION ############################ 24 | # (Optional) If you want to use Promptfoo and ragas, the eval tool 25 | ENABLE_EVALUATION=False 26 | EVALUATOR_DEPLOYMENT_NAME=ollama_chat/qwen2.5:0.5b 27 | EVALUATOR_BASE_URL="http://localhost:11434/" 28 | EVALUATOR_API_KEY="t" 29 | 30 | 31 | ####################### AI SEARCH ############################ 32 | # (Optional) If you want to use Azure Search AI 33 | ENABLE_AZURE_SEARCH=False 34 | AZURE_SEARCH_TOP_K=3 35 | AZURE_SEARCH_API_KEY="" 36 | AZURE_SEARCH_INDEX_NAME="" 37 | AZURE_SEARCH_INDEXER_NAME="" 38 | AZURE_SEARCH_SERVICE_ENDPOINT="" 39 | SEMENTIC_CONFIGURATION_NAME="" 40 | # -- AZURE BLOB STORAGE 41 | AZURE_STORAGE_ACCOUNT_NAME="" 42 | AZURE_STORAGE_ACCOUNT_KEY="" 43 | AZURE_CONTAINER_NAME="" 44 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | 9 | # Enable version updates for GitHub Actions 10 | - package-ecosystem: "github-actions" 11 | # Workflow files stored in the default location of `.github/workflows` 12 | # You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`. 13 | directory: "/" 14 | schedule: 15 | interval: "weekly" 16 | 17 | - package-ecosystem: "uv" 18 | # Workflow files stored in the default location of `.github/workflows` 19 | # You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`. 20 | directory: "/" 21 | schedule: 22 | interval: "weekly" 23 | -------------------------------------------------------------------------------- /.github/workflows/test-deploy.yaml: -------------------------------------------------------------------------------- 1 | name: 'Test-Deploy' 2 | 3 | on: 4 | pull_request: 5 | push: 6 | 7 | jobs: 8 | 9 | pre-commit: 10 | runs-on: ubuntu-22.04 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-python@v5 14 | with: 15 | python-version: '3.11' 16 | - uses: pre-commit/action@v3.0.1 17 | 18 | test-and-build: 19 | runs-on: ubuntu-22.04 20 | steps: 21 | - uses: actions/checkout@v4 22 | with: 23 | fetch-depth: 0 24 | # I prefer to use my Makefile to test the commands instead of calling uv actions. 25 | - name: Install Python dependencies 26 | shell: bash 27 | run: | 28 | make install-dev 29 | 30 | - name: Install and run ollama 31 | shell: bash 32 | run: | 33 | cp .env.example .env 34 | make install-ollama 35 | make download-ollama-model 36 | make run-ollama & 37 | 38 | - name: Run tests 39 | shell: bash 40 | run: | 41 | make test 42 | test-docker-compose: 43 | runs-on: ubuntu-22.04 44 | if: github.event_name == 'pull_request' 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | fetch-depth: 0 49 | - name: Run tests 50 | shell: bash 51 | run: | 52 | cp .env.example .env 53 | - uses: adambirds/docker-compose-action@v1.5.0 54 | with: 55 | compose-file: "./docker-compose.yml" 56 | 57 | deploy-Github-Pages: 58 | # Add a dependency to the build job 59 | needs: [pre-commit, test-and-build] 60 | if: ${{ needs.pre-commit.result == 'success' && needs.test-and-build.result == 'success' && github.ref == 'refs/heads/main'}} 61 | 62 | # Grant GITHUB_TOKEN the permissions required to make a Pages deployment 63 | permissions: 64 | contents: write 65 | 66 | # Deploy to the github-pages environment 67 | environment: 68 | name: github-pages 69 | url: ${{ steps.deployment.outputs.page_url }} 70 | 71 | # Specify runner + deployment step 72 | runs-on: ubuntu-latest 73 | steps: 74 | - uses: actions/checkout@v4 75 | with: 76 | fetch-depth: 0 77 | 78 | - name: Install Python dependencies 79 | shell: bash 80 | run: | 81 | make install-dev 82 | 83 | - name: Deploy Github Pages 84 | shell: bash 85 | run: | 86 | make deploy-doc-gh 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .envd 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | .idea/ 163 | 164 | # environment var 165 | .env 166 | *.secrets 167 | 168 | # node 169 | node_modules/ 170 | 171 | # github actions local 172 | bin/act 173 | 174 | #gitlab ci local 175 | .gitlab-ci-local/ 176 | .gitlab-ci-local-env 177 | .gitlab-ci-local.yml 178 | 179 | # Windows specific files 180 | *Zone.Identifier 181 | 182 | 183 | .cache* 184 | 185 | **/*tmp*.py 186 | **/*temp*.py 187 | 188 | 189 | 190 | src/evaluation/data/* 191 | !src/evaluation/data/test_json.csv 192 | !src/evaluation/data/test_simple.csv 193 | 194 | .qodo 195 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | # ruff is for python files and is used with ruff.toml 4 | 5 | # jupyter_jupyterlab_server_config.py is based on a large autogenerated file. 6 | # There is no value in checking it. 7 | exclude: "notebook.ipynb" 8 | 9 | 10 | repos: 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.3.0 13 | hooks: 14 | - id: check-added-large-files 15 | args: ['--maxkb=3000'] 16 | - id: check-toml 17 | - id: check-yaml 18 | - id: check-json 19 | - id: detect-private-key 20 | - id: end-of-file-fixer 21 | # - id: requirements-txt-fixer 22 | # exclude: ^requirements/.*$ 23 | - id: trailing-whitespace # This hook trims trailing whitespace 24 | - id: check-merge-conflict # Check for files that contain merge conflict strings 25 | - id: name-tests-test 26 | args: [ --pytest-test-first ] 27 | 28 | - repo: https://github.com/astral-sh/ruff-pre-commit 29 | rev: v0.11.0 30 | hooks: 31 | # Run the linter. 32 | - id: ruff 33 | args: [ --fix ] 34 | # Run the formatter. 35 | - id: ruff-format 36 | 37 | - repo: https://github.com/Yelp/detect-secrets 38 | rev: v1.5.0 39 | hooks: 40 | - id: detect-secrets 41 | args: ['--exclude-files', 'notebook.ipynb'] 42 | 43 | - repo: https://github.com/compilerla/conventional-pre-commit 44 | rev: v1.2.0 45 | hooks: 46 | - id: conventional-pre-commit 47 | stages: 48 | - commit-msg 49 | 50 | - repo: https://github.com/astral-sh/uv-pre-commit 51 | rev: 0.6.6 52 | hooks: 53 | # Update the uv lockfile 54 | - id: uv-lock 55 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to a positive environment for our 15 | community include: 16 | 17 | * Demonstrating empathy and kindness toward other people 18 | * Being respectful of differing opinions, viewpoints, and experiences 19 | * Giving and gracefully accepting constructive feedback 20 | * Accepting responsibility and apologizing to those affected by our mistakes, 21 | and learning from the experience 22 | * Focusing on what is best not just for us as individuals, but for the 23 | overall community 24 | 25 | Examples of unacceptable behavior include: 26 | 27 | * The use of sexualized language or imagery, and sexual attention or 28 | advances 29 | * Trolling, insulting or derogatory comments, and personal or political attacks 30 | * Public or private harassment 31 | * Publishing others' private information, such as a physical or email 32 | address, without their explicit permission 33 | * Other conduct which could reasonably be considered inappropriate in a 34 | professional setting 35 | 36 | ## Our Responsibilities 37 | 38 | Project maintainers are responsible for clarifying and enforcing our standards of 39 | acceptable behavior and will take appropriate and fair corrective action in 40 | response to any instances of unacceptable behavior. 41 | 42 | Project maintainers have the right and responsibility to remove, edit, or reject 43 | comments, commits, code, wiki edits, issues, and other contributions that are 44 | not aligned to this Code of Conduct, or to ban 45 | temporarily or permanently any contributor for other behaviors that they deem 46 | inappropriate, threatening, offensive, or harmful. 47 | 48 | ## Scope 49 | 50 | This Code of Conduct applies within all community spaces, and also applies when 51 | an individual is officially representing the community in public spaces. 52 | Examples of representing our community include using an official e-mail address, 53 | posting via an official social media account, or acting as an appointed 54 | representative at an online or offline event. 55 | 56 | ## Enforcement 57 | 58 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 59 | reported to the community leaders responsible for enforcement at <>. 60 | All complaints will be reviewed and investigated promptly and fairly. 61 | 62 | All community leaders are obligated to respect the privacy and security of the 63 | reporter of any incident. 64 | 65 | ## Attribution 66 | 67 | This Code of Conduct is adapted from the [Contributor Covenant](https://contributor-covenant.org/), version 68 | [1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct/code_of_conduct.md) and 69 | [2.0](https://www.contributor-covenant.org/version/2/0/code_of_conduct/code_of_conduct.md), 70 | and was generated by [contributing-gen](https://github.com/bttger/contributing-gen). 71 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this project 2 | 3 | First off, thanks for taking the time to contribute! ❤️ 4 | 5 | ## 1. Code of Conduct 6 | 7 | This project and everyone participating in it is governed by the [Code of Conduct](CODE_OF_CONDUCT.md). 8 | By participating, you are expected to uphold this code. Please report unacceptable behavior. 9 | 10 | 11 | ## 2. Team members: 12 | - Amine Djeghri 13 | 14 | ## 3. Best practices 💡 15 | - Docstring your functions and classes, it is even more important as it is used to generate the documentation with Mkdocs 16 | - If you use an IDE (like pycharm), define src the "source" folder and test the "test" folder so your IDE can help you auto import files 17 | - Use the `make` commands to run your code, it is easier and faster than writing the full command (and check the Makefile for all available commands 😉) 18 | - Run [Use the pre-commit hooks](https://pre-commit.com/) to ensure your code is formatted correctly and is of good quality 19 | - [UV](https://docs.astral.sh/uv/ ) is powerful (multi-thread, package graph solving, rust backend, etc.) use it as much as you can. 20 | - If you have a lot of data, use Polars for faster and more efficient dataframe processing. 21 | - If you have CPU intensive tasks, use multiprocessing with python's pool map. 22 | 23 | - Exceptions: 24 | - Always log the exceptions and errors (use loguru) and then raise them 25 | ```py 26 | except Exception as e: 27 | logger.error(e) # Log the original error with a personalized message or with e (only the message will be logged) 28 | raise e # All the stack trace will be logged 29 | ``` 30 | - Sometimes, you don't need to raise the exception (in a loop for example) to not interrupt the execution. 31 | - Use if else instead of catching and raising the exception when possible (log and raise also) 32 | ```py 33 | if not os.path.exists(file_path): 34 | logger.error(f"File not found: {file_path}. The current directory is: {os.getcwd()}") 35 | raise FileNotFoundError(f"The file {file_path} does not exist.") 36 | ``` 37 | ## 4. How to contribute 38 | ### 4.1 File structure (🌳 Tree) 39 | Check the readme file. 40 | 41 | ### 4.2 Steps for Installation (Contributors and maintainers) 42 | 43 | - The first step is [to install, read and test the project as a user](README.md#-steps-for-installation-users) 44 | - Then you can either [develop in a container](#22-or-develop-in-a-container) or [develop locally](#21-local-development) 45 | 46 | #### a. Local development 47 | - Requires Debian (Ubuntu 22.04) or MacOS. 48 | - Python will be installed using uv. 49 | - git clone the repository 50 | 51 | - To install the dev dependencies (pre-commit, pytest, ruff...), run ``make install-dev`` 52 | - run ``make pre-commit install`` to install pre-commit hooks 53 | - To install the GitHub actions locally, run ``make install-act`` 54 | - To install the gitlab ci locally, run ``make install-ci`` 55 | 56 | #### b. or Develop in a container 57 | - If you have a .venv folder locally, you need to delete it, otherwise it will create a conflict since the project is mounted in the container. 58 | - You can run a docker image containing the project with ``make docker-prod`` (or ``make docker-dev`` if you want the project to be mounted in the container). 59 | - A venv is created inside the container and the dependencies are installed. 60 | 61 | 62 | ### 4.3. Run the test to see if everything is working 63 | - Test the package with : 64 | - ``make test`` will run all the tests (requires .env file) 65 | 66 | ### 4.4. Pushing your work 67 | - Before you start working on an issue, please comment on (or create) the issue and wait for it to be assigned to you. If 68 | someone has already been assigned but didn't have the time to work on it lately, please communicate with them and ask if 69 | they're still working on it. This is to avoid multiple people working on the same issue. 70 | Once you have been assigned an issue, you can start working on it. When you are ready to submit your changes, open a 71 | pull request. For a detailed pull request tutorial, see this guide. 72 | 73 | 1. Create a branch from the dev branch and respect the naming convention: `feature/your-feature-name` 74 | or `bugfix/your-bug-name`. 75 | 2. Before commiting your code : 76 | - Run ``make test`` to run the tests 77 | - Run ``make pre-commit`` to check the code style & linting. 78 | - Run ``make deploy-doc-local`` to update the documentation locally and test the website. 79 | - (optional) Commit Messages: This project uses [Gitmoji](https://gitmoji.dev/) for commit messages. It helps to 80 | understand the purpose of the commit through emojis. For example, a commit message with a bug fix can be prefixed with 81 | 🐛. There are also [Emojis in GitHub](https://github.com/ikatyang/emoji-cheat-sheet/blob/master/README.md) 82 | - Manually, merge dev branch into your branch to solve and avoid any conflicts. Merging strategy: merge : dev → 83 | your_branch 84 | - After merging, run ``make test`` and ``make pre-commit`` again to ensure that the tests are still passing. 85 | - Update the version in ``pyproject.toml`` file 86 | - If your project is a python package, run ``make build-pacakge`` to build the package and create the wheel in the `dist` folder 87 | 3. Run CI/CD Locally: Depending on the platform you use: 88 | - GitHub Actions: run `make install-act` then `make act` for GitHub Actions 89 | 4. Create a pull request. If the GitHub actions pass, the PR will be accepted and merged to dev. 90 | 91 | ### 4.5. (For repository maintainers) Merging strategies & GitHub actions guidelines** 92 | 93 | - Once the dev branch is tested, the pipeline is green, and the PR has been accepted, you can merge with a 'merge' 94 | strategy. 95 | - DEV → MAIN: Then, you should create a merge from dev to main with Squash strategy. 96 | - MAIN → RELEASE: The status of the ticket will change then to 'done.' 97 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Run ``make docker-dev`` from the root of the project 2 | 3 | 4 | # Define an argument for the Python version, defaulting to 3.11 if not provided. 5 | ARG PYTHON_VERSION=3.11.4 6 | FROM python:${PYTHON_VERSION}-slim 7 | LABEL authors="amine" 8 | 9 | # Prevents Python from writing pyc files. 10 | ENV PYTHONDONTWRITEBYTECODE=1 11 | # output is written directly to stdout or stderr without delay, making logs appear immediately in the console or in log files. 12 | ENV PYTHONUNBUFFERED=1 13 | 14 | # keep this in case some commands use sudo (tesseract for example). This docker doesn't need a password 15 | #RUN apt-get update && apt-get install -y sudo && apt-get clean && rm -rf /var/lib/apt/lists/* 16 | 17 | RUN apt update -y 18 | RUN apt upgrade -y 19 | RUN apt-get install build-essential -y 20 | RUN apt-get install curl -y 21 | RUN apt autoremove -y 22 | RUN apt autoclean -y 23 | 24 | # Set environment variables 25 | ENV APP_DIR=/generative-ai-project-template 26 | # Set working directory 27 | WORKDIR $APP_DIR 28 | 29 | # copy dependencies and installing them before copying the project to not rebuild the .env every time 30 | COPY pyproject.toml uv.lock Makefile $APP_DIR 31 | 32 | RUN make install-dev 33 | 34 | COPY . $APP_DIR 35 | 36 | # Define default entrypoint if needed (Optional) 37 | CMD ["/bin/bash"] 38 | -------------------------------------------------------------------------------- /Dockerfile.ollama: -------------------------------------------------------------------------------- 1 | FROM ollama/ollama:latest 2 | 3 | # Update and install necessary packages 4 | RUN apt update -y && \ 5 | apt upgrade -y && \ 6 | apt-get install build-essential -y 7 | 8 | 9 | COPY Makefile .env.example.docker ./ 10 | RUN mv .env.example.docker .env 11 | RUN make install-ollama 12 | RUN make run-ollama && make download-ollama-model 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Amine Djeghri 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ENV_FILE_PATH := .env 2 | -include $(ENV_FILE_PATH) # keep the '-' to ignore this file if it doesn't exist.(Used in gitlab ci) 3 | 4 | # Colors 5 | GREEN=\033[0;32m 6 | YELLOW=\033[0;33m 7 | NC=\033[0m 8 | 9 | UV := "$$HOME/.local/bin/uv" # keep the quotes incase the path contains spaces 10 | 11 | # installation 12 | install-uv: 13 | @echo "${YELLOW}=========> installing uv ${NC}" 14 | @if [ -f $(UV) ]; then \ 15 | echo "${GREEN}uv exists at $(UV) ${NC}"; \ 16 | $(UV) self update; \ 17 | else \ 18 | echo "${YELLOW}Installing uv${NC}"; \ 19 | curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="$$HOME/.local/bin" sh ; \ 20 | fi 21 | 22 | install-prod:install-uv 23 | @echo "${YELLOW}=========> Installing dependencies...${NC}" 24 | @$(UV) sync --no-group dev --no-group docs 25 | @echo "${GREEN}Dependencies installed.${NC}" 26 | 27 | install-dev:install-uv 28 | @echo "${YELLOW}=========> Installing dependencies...\n \ 29 | Development dependencies (dev & docs) will be installed by default in install-dev.${NC}" 30 | @$(UV) sync 31 | @echo "${GREEN}Dependencies installed.${NC}" 32 | 33 | STREAMLIT_PORT ?= 8501 34 | run-frontend: 35 | @echo "Running frontend" 36 | cd src; $(UV) run streamlit run main_frontend.py --server.port $(STREAMLIT_PORT) --server.headless True; 37 | 38 | run-backend: 39 | @echo "Running backend" 40 | cd src; $(UV) run main_backend.py; 41 | 42 | run-app: 43 | make run-frontend run-backend -j2 44 | 45 | pre-commit-install: 46 | @echo "${YELLOW}=========> Installing pre-commit...${NC}" 47 | $(UV) run pre-commit install 48 | 49 | pre-commit:pre-commit-install 50 | @echo "${YELLOW}=========> Running pre-commit...${NC}" 51 | $(UV) run pre-commit run --all-files 52 | 53 | 54 | ####### local CI / CD ######## 55 | # uv caching : 56 | prune-uv: 57 | @echo "${YELLOW}=========> Prune uv cache...${NC}" 58 | @$(UV) cache prune 59 | # clean uv caching 60 | clean-uv-cache: 61 | @echo "${YELLOW}=========> Cleaning uv cache...${NC}" 62 | @$(UV) cache clean 63 | 64 | # Github actions locally 65 | install-act: 66 | @echo "${YELLOW}=========> Installing github actions act to test locally${NC}" 67 | curl --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/nektos/act/master/install.sh | bash 68 | @echo -e "${YELLOW}Github act version is :" 69 | @./bin/act --version 70 | 71 | act: 72 | @echo "${YELLOW}Running Github Actions locally...${NC}" 73 | @./bin/act --env-file .env --secret-file .secrets 74 | 75 | 76 | # clear GitHub and Gitlab CI local caches 77 | clear_ci_cache: 78 | @echo "${YELLOW}Clearing CI cache...${NC}" 79 | @echo "${YELLOW}Clearing Github ACT local cache...${NC}" 80 | rm -rf ~/.cache/act ~/.cache/actcache 81 | 82 | ######## Ollama 83 | install-ollama: 84 | @echo "${YELLOW}=========> Installing ollama first...${NC}" 85 | @if [ "$$(uname)" = "Darwin" ]; then \ 86 | echo "Detected macOS. Installing Ollama with Homebrew..."; \ 87 | brew install --force --cask ollama; \ 88 | elif [ "$$(uname)" = "Linux" ]; then \ 89 | echo "Detected Linux. Installing Ollama with curl..."; \ 90 | curl -fsSL https://ollama.com/install.sh | sh; \ 91 | else \ 92 | echo "Unsupported OS. Please install Ollama manually."; \ 93 | exit 1; \ 94 | fi 95 | 96 | #check-ollama-running: 97 | # @echo "${YELLOW}Checking if Ollama server is running...${NC}" 98 | # @if ! nc -z 127.0.0.1 11434; then \ 99 | # echo "${YELLOW}Ollama server is not running. Starting it now...${NC}"; \ 100 | # $(MAKE) run-ollama & \ 101 | # sleep 5; \ 102 | # fi 103 | 104 | run-ollama: 105 | @echo "${YELLOW}Running Ollama...${NC}" 106 | @ollama serve & 107 | @sleep 5 108 | @echo "${GREEN}Ollama server is running in background.${NC}" 109 | 110 | download-ollama-model: 111 | @echo "${YELLOW}Downloading local model ${OLLAMA_MODEL_NAME} and ${OLLAMA_EMBEDDING_MODEL_NAME}...${NC}" 112 | @ollama pull ${OLLAMA_EMBEDDING_MODEL_NAME} 113 | @ollama pull ${OLLAMA_MODEL_NAME} 114 | 115 | 116 | chat-ollama: 117 | @echo "${YELLOW}Running ollama...${NC}" 118 | @ollama run ${OLLAMA_MODEL_NAME} 119 | 120 | ######## Tests ######## 121 | test: 122 | # pytest runs from the root directory 123 | @echo "${YELLOW}Running tests...${NC}" 124 | @$(UV) run pytest tests 125 | 126 | test-ollama: 127 | curl -X POST http://localhost:11434/api/generate -H "Content-Type: application/json" -d '{"model": "phi3:3.8b-mini-4k-instruct-q4_K_M", "prompt": "Hello", "stream": false}' 128 | 129 | test-inference-llm: 130 | # llm that generate answers (used in chat, rag and promptfoo) 131 | @echo "${YELLOW}=========> Testing LLM client...${NC}" 132 | @$(UV) run pytest tests/test_llm_endpoint.py -k test_inference_llm --disable-warnings 133 | 134 | 135 | run-langfuse: 136 | @echo "${YELLOW}Running langfuse...${NC}" 137 | @if [ "$$(uname)" = "Darwin" ]; then \ 138 | echo "Detected macOS running postgresql with Homebrew..."; \ 139 | colima start 140 | brew services start postgresql@17; \ 141 | 142 | elif [ "$$(uname)" = "Linux" ]; then \ 143 | echo "Detected Linux running postgresql with systemctl..."; \ 144 | else \ 145 | echo "Unsupported OS. Please start postgres manually."; \ 146 | exit 1; \ 147 | fi 148 | 149 | 150 | ########### Docker & deployment 151 | CONTAINER_NAME = generativr-ai-project-template 152 | export PROJECT_ROOT = $(shell pwd) 153 | docker-build: 154 | @echo "${YELLOW}Building docker image...${NC}" 155 | docker build -t $(CONTAINER_NAME) --progress=plain . 156 | docker-prod: docker-build 157 | @echo "${YELLOW}Running docker for production...${NC}" 158 | docker run -it --rm --name $(CONTAINER_NAME)-prod $(CONTAINER_NAME) /bin/bash 159 | 160 | # Developing in a container 161 | docker-dev: docker-build 162 | @echo "${YELLOW}Running docker for development...${NC}" 163 | # Docker replaces the contents of the /app directory when you mount a project directory 164 | # need fix : the .venv directory is unfortunately not retained in the container ( we need to solve it to retain it) 165 | docker run -it --rm -v $(PROJECT_ROOT):/app -v /app/.venv --name $(CONTAINER_NAME)-dev $(CONTAINER_NAME) /bin/bash 166 | 167 | # run docker-compose 168 | docker-compose: 169 | @echo "${YELLOW}Running docker-compose...${NC}" 170 | docker-compose up --build 171 | 172 | 173 | # This build the documentation based on current code 'src/' and 'docs/' directories 174 | # This is to run the documentation locally to see how it looks 175 | deploy-doc-local: 176 | @echo "${YELLOW}Deploying documentation locally...${NC}" 177 | @$(UV) run mkdocs build && $(UV) run mkdocs serve 178 | 179 | # Deploy it to the gh-pages branch in your GitHub repository (you need to setup the GitHub Pages in github settings to use the gh-pages branch) 180 | deploy-doc-gh: 181 | @echo "${YELLOW}Deploying documentation in github actions..${NC}" 182 | @$(UV) run mkdocs build && $(UV) run mkdocs gh-deploy 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Generative AI Project Template

4 | 5 | 6 | [![python](https://img.shields.io/badge/python-3.11+-blue?logo=python)](https://www.python.org/downloads/release/python-3110/) 7 | [![Debian](https://img.shields.io/badge/Debian-A81D33?logo=debian&logoColor=fff)](https://www.debian.org/) 8 | [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#) 9 | 10 | [![Hugging Face](https://img.shields.io/badge/Hugging%20Face-FFD21E?logo=huggingface&logoColor=000)](#) 11 | [![OpenAI](https://img.shields.io/badge/OpenAI-%23412991?logo=openai&logoColor=white)](https://pytorch.org/get-started/locally/) 12 | [![Microsoft Azure](https://custom-icon-badges.demolab.com/badge/Microsoft%20Azure-0089D6?logo=msazure&logoColor=white)](#) 13 | 14 | [![streamlit](https://img.shields.io/badge/-Streamlit-FF4B4B?style=flat&logo=streamlit&logoColor=white)](#) 15 | [![FastAPI](https://img.shields.io/badge/FastAPI-009485.svg?logo=fastapi&logoColor=white)](#) 16 | 17 | [![Style: Ruff](https://img.shields.io/badge/style-ruff-41B5BE?style=flat)](https://github.com/charliermarsh/ruff) 18 | [![MkDocs](https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff)](#) 19 | [![mkdocs-material](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/juftin/mkdocs-material/66d65cf/src/templates/assets/images/badge.json)]() 20 | [![Docker](https://img.shields.io/badge/Docker-2496ED?logo=docker&logoColor=fff)](#) 21 | [![GitLab CI](https://img.shields.io/badge/GitLab%20CI-FC6D26?logo=gitlab&logoColor=fff)](#) 22 | [![GitHub Actions](https://img.shields.io/badge/GitHub_Actions-2088FF?logo=github-actions&logoColor=white)](#) 23 | 24 | Template for a new AI Cloud project. 25 | 26 | Click on [Use this template](https://github.com/aminedjeghri/ai-cloud-project-template/generate) to start your own project! 27 | 28 | 29 | 30 | This project is a generative ai template. It contains the following features: LLMs, information extraction, chat, rag & evaluation. 31 | It uses LLMs(local or cloud),streamlit (with and without fastapi) & Promptfoo as an evaluation and redteam framework for your AI system. 32 | 33 | | Test embeddings | Test chat | 34 | |-------------------------------------------------------|------------------------------------------------------| 35 | | | | 36 | 37 | **Engineering tools:** 38 | 39 | - [x] Use UV to manage packages 40 | - [x] pre-commit hooks: use ``ruff`` to ensure the code quality & ``detect-secrets`` to scan the secrets in the code. 41 | - [x] Logging using loguru (with colors) 42 | - [x] Pytest for unit tests 43 | - [x] Dockerized project (Dockerfile & docker-compose). 44 | - [x] Streamlit (frontend) & FastAPI (backend) 45 | - [x] Make commands to handle everything for you: install, run, test 46 | 47 | **AI tools:** 48 | 49 | - [x] LLM running locally with Ollama or in the cloud with any LLM provider (LiteLLM) 50 | - [x] Information extraction and Question answering from documents 51 | - [x] Chat to test the AI system 52 | - [x] Efficient async code using asyncio. 53 | - [x] AI Evaluation framework: using Promptfoo, Ragas & more... 54 | 55 | **CI/CD & Maintenance tools:** 56 | 57 | - [x] CI/CD pipelines: ``.github/workflows`` for GitHub (Testing the AI system, local models with Ollama and the dockerized app) 58 | - [x] Local CI/CD pipelines: GitHub Actions using ``github act`` 59 | - [x] GitHub Actions for deploying to GitHub Pages with mkdocs gh-deploy 60 | - [x] Dependabot ``.github/dependabot.yml`` for automatic dependency and security updates 61 | 62 | **Documentation tools:** 63 | 64 | - [x] Wiki creation and setup of documentation website using Mkdocs 65 | - [x] GitHub Pages deployment using mkdocs gh-deploy plugin 66 | 67 | 68 | Upcoming features: 69 | - [ ] add RAG again 70 | - [ ] optimize caching in CI/CD 71 | - [ ] [Pull requests templates](https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/creating-a-pull-request-template-for-your-repository) 72 | - [ ] Additional MLOps templates: https://github.com/fmind/mlops-python-package 73 | - [ ] Add MLFlow 74 | - [ ] add Langfuse 75 | 76 | 77 | ## 1. Getting started 78 | This project contains two parts: 79 | 80 | - The AI app: contains an AI system (local or cloud), a frontend (streamlit), with an optional backend(fastapi). 81 | - (optional)The Evaluation Tool: The evaluation tool is used to evaluate the performance and safety of the AI system. It uses promptfoo & RAGAS, Python 3.11 and NVM are needed, but no need to install them by yourself since the project will handle that for you. 82 | 83 | The following files are used in the contribution pipeline: 84 | 85 | - ``.env.example``: example of the .env file. 86 | - ``.env`` : contains the environment variables used by the app. 87 | - ``Makefile``: contains the commands to run the app locally. 88 | - ``Dockerfile``: the dockerfile used to build the project inside a container. It uses the Makefile commands to run the app. 89 | - ``.pre-commit-config.yaml``: pre-commit hooks configuration file 90 | - ``pyproject.toml``: contains the pytest, ruff & other configurations. 91 | - ``src/api/log_config.py`` and ``src/main_backend.py``: uvicorn (fastapi) logging configuration. 92 | - ``src/utils.py``: logger (using logguru) and settings using pydantic. 93 | the frontend. 94 | - `.github/workflows/**.yml`: GitHub actions configuration files. 95 | - `.gitlab-ci.yml`: Gitlab CI configuration files. 96 | - ``.gitignore``: contains the files to ignore in the project. 97 | 98 | Tree: 99 | 100 | ``` 101 | 102 | ├── .env.example # example of the .env file 103 | ├── .env # contains the environment variables 104 | ├── Dockerfile # the dockerfile used to build the project inside a container. It uses the Makefile commands to run the app. 105 | ├── docker-compose.yml # docker-compose configuration file (used to run the frontend and backend in docker) 106 | ├── Makefile # contains the commands to run the app (like running the frontend, tests, installing packages, docker...) 107 | ├── assets 108 | ├── pyproject.toml # uv, dependencies, pytest, ruff & other configurations for the package 109 | ├── uv.lock # uv lock file 110 | ├── .pre-commit-config.yaml # pre-commit hooks configuration file 111 | ├── .gitignore # contains the files to ignore in the project 112 | ├── .github 113 | │ ├── dependabot.yml # dependabot configuration file 114 | │ └── workflows # GitHub actions configuration files 115 | │ └── test-deploy.yaml 116 | ├── mkdocs.yml # mkdocs configuration file 117 | ├── scripts 118 | │   └── gen_doc_stubs.py # mkdocs : generate documentation stubs 119 | ├── src 120 | │   ├── api 121 | │   ├── evaluation 122 | │   ├── main_backend.py 123 | │   ├── main_frontend.py 124 | │   ├── ml 125 | │   ├── settings.py 126 | │   └── utils.py # logger (using logguru) and settings using pydantic. 127 | ├── CODE_OF_CONDUCT.md 128 | ├── CONTRIBUTING.md 129 | ├── README.md 130 | ├── LICENSE 131 | └── tests 132 | ``` 133 | 134 | 135 | 136 | ### 1.1. Local Prerequisites 137 | 138 | - Ubuntu 22.04 or MacOS 139 | - git clone the repository 140 | - UV & Python 3.11 (will be installed by the Makefile) 141 | - Create a ``.env`` file *(take a look at the ``.env.example`` file)* 142 | 143 | 144 | ### 1.2 ⚙️ Steps for Installation (Users) 145 | #### App (AI, FastAPI, Streamlit) 146 | You can run the app in a docker container or locally. 147 | #### Docker: 148 | - The `docker-compose.yml` file is used to run the app in a docker container. It will install the following services: frontend, backend and ollama. Your can comment out ollama if you don't need it. 149 | - The `docker-compose.yml` will use the `.env.example.docker` file to configure the environment variables. Per default, it uses ollama docker container. 150 | - Run this command : `make docker-compose` then go to [http://localhost:8501](http://localhost:8501) 151 | 152 | #### Local : 153 | 1. To install the app, run `make install-prod`. 154 | 2. Choose one of the following options: 155 | - **Local model**: we use Ollama and litellm to run local models. The default model is `qwen2.5:0.5b` which is a very lightweight model but can be changed. 156 | - Create a ``.env`` file *(You can copy and paste the ``.env.example`` file with `cp .env.example .env`)* 157 | - Install Ollama (for openai) `make install-ollama` 158 | - Download the model, run `make download-ollama-model`. It will download the model present in the `OLLAMA_MODEL_NAME` var in the ``.env`` file (default is `qwen2.5:0.5b`). 159 | - Run ollama to emulate openai : `make run-ollama` 160 | - Run `make test-ollama`. You should see an output with a response. 161 | - Discuss with the model : `make chat-ollama` 162 | - **Cloud model:** 163 | - Create/update the ``.env`` file *(You can copy and paste the ``.env.example`` file with `cp .env.example .env`)* 164 | - Follow the litellm [naming convention](https://docs.litellm.ai/docs/providers). 165 | 166 | 3. Run `make test-inference-llm` to check if your LLM responds. 167 | 4. Run the app: 168 | - To run the app with Streamlit (and without fastapi), run `make run-frontend` 169 | - To run the app with both Streamlit and FastAPI, run `make run-app` 170 | 171 | ### 1.3 ⚙️ Steps for Installation (Contributors and maintainers) 172 | Check the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. 173 | 174 | ## 2. Contributing 175 | Check the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. 176 | -------------------------------------------------------------------------------- /assets/embeddings_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/assets/embeddings_img.png -------------------------------------------------------------------------------- /assets/icon.svg: -------------------------------------------------------------------------------- 1 |
2 | -------------------------------------------------------------------------------- /assets/promptfoo_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/assets/promptfoo_img.png -------------------------------------------------------------------------------- /assets/streamlit_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/assets/streamlit_img.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | x-common-environment: 2 | &common-environment 3 | OLLAMA_MODEL_NAME: ${OLLAMA_MODEL_NAME:-qwen2.5:0.5b} 4 | OLLAMA_EMBEDDING_MODEL_NAME: ${OLLAMA_EMBEDDING_MODEL_NAME:-all-minilm:l6-v2} 5 | INFERENCE_DEPLOYMENT_NAME: ${INFERENCE_DEPLOYMENT_NAME:-ollama_chat/qwen2.5:0.5b} 6 | INFERENCE_BASE_URL: http://ollama:11434 7 | INFERENCE_API_KEY: ${INFERENCE_API_KEY:-t} 8 | EMBEDDINGS_DEPLOYMENT_NAME: ${EMBEDDINGS_DEPLOYMENT_NAME:-ollama/all-minilm:l6-v2} 9 | EMBEDDINGS_BASE_URL: http://ollama:11434 10 | EMBEDDINGS_API_KEY: ${EMBEDDINGS_API_KEY:-t} 11 | DEV_MODE: ${DEV_MODE:-True} 12 | FASTAPI_HOST: ${FASTAPI_HOST:-localhost} 13 | FASTAPI_PORT: ${FASTAPI_PORT:-8080} 14 | STREAMLIT_PORT: ${STREAMLIT_PORT:-8501} 15 | 16 | services: 17 | frontend: 18 | build: 19 | context: . 20 | dockerfile: Dockerfile 21 | container_name: frontend 22 | command: make run-frontend 23 | ports: 24 | - "8501:8501" 25 | env_file: ".env.example.docker" 26 | environment: 27 | <<: *common-environment 28 | 29 | # backend: 30 | # build: 31 | # context: . 32 | # dockerfile: Dockerfile 33 | # container_name: backend 34 | # command: make run-backend 35 | # ports: 36 | # - "8080:8080" 37 | # env_file: ".env.example.docker" 38 | # environment: 39 | # <<: *common-environment 40 | 41 | ollama: 42 | build: 43 | context: . 44 | dockerfile: Dockerfile.ollama 45 | ports: 46 | - 11434:11434 47 | volumes: 48 | - ../ollama/:/root/.ollama 49 | container_name: ollama 50 | pull_policy: always 51 | tty: true 52 | restart: always 53 | environment: 54 | <<: *common-environment 55 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Generative AI Project Template 2 | site_description: Documentation 3 | site_author: Amine Djeghri 4 | docs_dir: . 5 | # site_url: #TODO: Fill when deployment CI added 6 | #site_dir: public 7 | #repo_url: 8 | #edit_uri: blob/main/docs/ 9 | 10 | 11 | theme: 12 | 13 | name: "material" # https://squidfunk.github.io/mkdocs-material/getting-started/ 14 | language: en 15 | features: # https://squidfunk.github.io/mkdocs-material/setup/ 16 | - search.suggest 17 | - search.highlight 18 | - search.share 19 | - navigation.instant 20 | - navigation.instant.progress 21 | - navigation.tracking 22 | - navigation.tabs 23 | - navigation.tabs.sticky 24 | # - navigation.sections 25 | - navigation.path 26 | - navigation.indexes 27 | - navigation.top 28 | - toc.follow 29 | - content.code.copy 30 | - content.code.annotate 31 | palette: 32 | # Palette toggle for light mode 33 | - scheme: default 34 | toggle: 35 | icon: material/brightness-7 36 | name: Switch to dark mode 37 | 38 | # Palette toggle for dark mode 39 | - scheme: slate 40 | toggle: 41 | icon: material/brightness-4 42 | name: Switch to light mode 43 | plugins: 44 | - mkdocstrings: 45 | default_handler: python 46 | import: 47 | - https://docs.python-requests.org/en/master/objects.inv 48 | load_external_modules: true 49 | handlers: 50 | python: 51 | paths: [., source] 52 | - gen-files: 53 | scripts: 54 | - scripts/gen_doc_stubs.py 55 | - search 56 | - same-dir 57 | - exclude: 58 | glob: 59 | - node_modules/** 60 | - .venv/** 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ai-cloud-project-template" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = "==3.13.*" 7 | dependencies = [ 8 | # AI dependencies 9 | "openai==1.66.3", 10 | "litellm==1.63.14", 11 | "ollama==0.4.7", 12 | "ragas==0.2.14", 13 | "instructor==1.7.7", 14 | "azure-search-documents==11.5.2", 15 | "azure-storage-blob==12.25.0", 16 | # observability 17 | "langfuse==2.60.1", 18 | # backend & frontend 19 | "python-multipart==0.0.9", 20 | "fastapi[standard]==0.115.5", 21 | "streamlit==1.40.1", 22 | "uvicorn==0.32.1", 23 | # Engineering 24 | "pydantic==2.10.6", 25 | "pydantic-settings>=2.8.1", 26 | "loguru==0.7.3", 27 | "rich==13.9.4", 28 | 29 | ] 30 | 31 | ############### uv configuration 32 | # uses also the depenencies in the [project.dependencies] section 33 | [tool.uv] 34 | managed = true 35 | default-groups = ["dev", "docs"] 36 | 37 | [dependency-groups] 38 | # if you add new dependencies here, make sure to add them to [tool.uv] default-groups up above 39 | dev = [ 40 | "pytest == 8.3.0", 41 | "pytest-asyncio == 0.24.0", 42 | "pre-commit == 4.0.1", 43 | "jupyter==1.1.1", 44 | "ruff==0.8.1" 45 | ] 46 | docs = [ 47 | "mkdocs == 1.6.1", 48 | "mkdocs-material>=9.5.41", 49 | "mkdocstrings>=0.26.2", 50 | "mkdocs-mermaid2-plugin>=1.1.1", 51 | "mkdocs-gen-files>=0.5.0", 52 | "mkdocstrings-python", 53 | "mkdocs-same-dir", 54 | "mkdocs-exclude" 55 | ] 56 | 57 | 58 | # scripts : inside the root folder, you can run `uv run --directory . hi` or 59 | [project.scripts] 60 | hello = "tmp" # will read from __init__.py. 61 | 62 | # pytest configuration 63 | [tool.pytest.ini_options] 64 | pythonpath = ["src"] 65 | 66 | 67 | # ruff configuration 68 | [tool.ruff] 69 | extend-exclude = ["*.ipynb"] 70 | line-length = 100 71 | 72 | # Enable all `pydocstyle` rules, limiting to those that adhere to the Google convention via `convention = "google"` 73 | [tool.ruff.lint] 74 | select = ["D", "F401"] 75 | ignore = ["D100", "D101", "D102", "D103", "D104", "D107", "D417"] 76 | 77 | [tool.ruff.lint.pydocstyle] 78 | # Use Google-style docstrings. 79 | convention = "google" 80 | -------------------------------------------------------------------------------- /scripts/gen_doc_stubs.py: -------------------------------------------------------------------------------- 1 | """This script walks through the source code and generates a markdown file for each python file. 2 | 3 | The goal is then for the markdown files to be used by mkdocs to call the plugin mkdocstring. 4 | """ 5 | 6 | from pathlib import Path 7 | 8 | import mkdocs_gen_files 9 | 10 | src_root = Path("src") 11 | for path in src_root.glob("**/*.py"): 12 | if "__init__" in str(path): 13 | print("Skipping", path) 14 | continue 15 | doc_path = Path("package", path.relative_to(src_root)).with_suffix(".md") 16 | 17 | if "seqly" not in str(path) and "__init__" not in str(path): 18 | with mkdocs_gen_files.open(doc_path, "w") as f: 19 | ident = ".".join(path.with_suffix("").parts) 20 | print("::: " + ident, file=f) 21 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/__init__.py -------------------------------------------------------------------------------- /src/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/api/__init__.py -------------------------------------------------------------------------------- /src/api/api.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # add the parent directory to system path so we can run api_server.py from the src directory 4 | import sys 5 | 6 | sys.path.append(os.path.dirname(os.path.dirname("../"))) 7 | 8 | from fastapi import FastAPI 9 | from fastapi.responses import JSONResponse 10 | 11 | from utils import logger, settings 12 | 13 | from api.api_route import router, TagEnum 14 | 15 | app = FastAPI() 16 | 17 | # ROUTERS 18 | routers = [router] 19 | for router in routers: 20 | app.include_router(router) 21 | 22 | 23 | @app.get("/", tags=[TagEnum.general]) 24 | async def root(): 25 | logger.debug("Server is up and running!") 26 | 27 | logger.debug(f"Settings: {settings}") 28 | 29 | return JSONResponse(content="FastAPI server is up and running!") 30 | -------------------------------------------------------------------------------- /src/api/api_route.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from fastapi import APIRouter 4 | from fastapi.responses import JSONResponse 5 | 6 | from ml.ai import get_rag_response 7 | from utils import logger 8 | 9 | 10 | class TagEnum(str, Enum): 11 | """API tags.""" 12 | 13 | general = "general" 14 | tag_example = "tag_example" 15 | 16 | 17 | router = APIRouter(prefix="/prefix_example", tags=[TagEnum.tag_example]) 18 | 19 | 20 | @router.get("/example/") 21 | async def get_conversation_by_id(conversation_id: str): 22 | return JSONResponse(content="example response : 1234") 23 | 24 | 25 | @router.get("/form/") 26 | async def get_conversation_by_id(question: str): 27 | logger.debug(f"question: {question}") 28 | res = get_rag_response(question) 29 | return JSONResponse(content=res) 30 | -------------------------------------------------------------------------------- /src/api/log_config.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | # This file is used to configure the logging for the application (check the main of api_server.py) 4 | # It is used with the uvicorn/fastapi function to configure the logging 5 | # There is an environment variable named DEV_MODE that is used to configure the logging level 6 | 7 | 8 | LOGGING_CONFIG: dict[str, Any] = { 9 | "version": 1, 10 | "disable_existing_loggers": False, 11 | "formatters": { 12 | "default": { 13 | "()": "uvicorn.logging.DefaultFormatter", 14 | "fmt": "%(asctime)s - %(name)s - %(levelprefix)s - [%(filename)s:%(funcName)s:%(lineno)d] - %(message)s", 15 | "use_colors": None, 16 | }, 17 | "access": { 18 | "()": "uvicorn.logging.AccessFormatter", 19 | "fmt": '%(asctime)s - %(name)s - %(levelprefix)s - %(client_addr)s - "%(request_line)s" %(status_code)s', # noqa: E501 20 | }, 21 | "access_file": { 22 | "()": "uvicorn.logging.AccessFormatter", 23 | "fmt": '%(asctime)s - %(name)s - %(levelprefix)s - %(client_addr)s - "%(request_line)s" %(status_code)s', # noqa: E501 24 | "use_colors": False, 25 | }, 26 | }, 27 | "handlers": { 28 | "file_handler": { 29 | "formatter": "access_file", 30 | "class": "logging.handlers.RotatingFileHandler", 31 | "filename": "./app.log", 32 | "mode": "a+", 33 | "maxBytes": 10 * 1024 * 1024, 34 | "backupCount": 0, 35 | }, 36 | "default": { 37 | "formatter": "default", 38 | "class": "logging.StreamHandler", 39 | "stream": "ext://sys.stderr", 40 | }, 41 | "access": { 42 | "formatter": "access", 43 | "class": "logging.StreamHandler", 44 | "stream": "ext://sys.stdout", 45 | }, 46 | }, 47 | "loggers": { 48 | "uvicorn": {"handlers": ["default"], "level": "INFO", "propagate": False}, 49 | "uvicorn.error": {"level": "INFO"}, 50 | "uvicorn.access": { 51 | "handlers": ["access", "file_handler"], 52 | "level": "INFO", 53 | "propagate": False, 54 | }, 55 | }, 56 | } 57 | -------------------------------------------------------------------------------- /src/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/__init__.py -------------------------------------------------------------------------------- /src/evaluation/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/configs/__init__.py -------------------------------------------------------------------------------- /src/evaluation/configs/config_baseline.py: -------------------------------------------------------------------------------- 1 | def call_api(query, options, context): 2 | result = { 3 | "output": "output", 4 | } 5 | 6 | return result 7 | -------------------------------------------------------------------------------- /src/evaluation/configs/config_compare_prompts.yaml: -------------------------------------------------------------------------------- 1 | # Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide 2 | description: "My eval" 3 | 4 | prompts: 5 | # the {{query}} and {{context}} are the columns in the dataset (test_simple.csv) 6 | - | 7 | You are an internal corporate chatbot. 8 | Respond to this query: {{query}} 9 | - Here is some context that you can use to write your response: {{context}} 10 | 11 | - | 12 | You are an assistant that gives wrong information. 13 | Respond to this query: {{query}} 14 | - Here is some context that you can use to write your response: {{context}} 15 | 16 | providers: 17 | - id: azureopenai:chat:{{env.AZURE_OPENAI_DEPLOYMENT_NAME}} # env variables are in .env 18 | label: '{{env.AZURE_OPENAI_DEPLOYMENT_NAME}}' 19 | 20 | 21 | defaultTest: 22 | options: 23 | provider: azureopenai:chat:{{env.AZURE_OPENAI_DEPLOYMENT_NAME}} 24 | assert: 25 | - type: select-best 26 | value: choose the right response 27 | 28 | tests: 29 | - file://../data/test_simple.csv 30 | -------------------------------------------------------------------------------- /src/evaluation/configs/config_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from evaluation.metrics.utils import safe_eval 4 | from utils import logger 5 | 6 | logger.debug("Loading promptfoo_hooks.py") 7 | 8 | 9 | def get_var(var_name, prompt, other_vars): 10 | """Function used by default by promptfoo call from the column 'context' of the dataset used in config_json.yml (test_json.csv). 11 | 12 | This function returns the context that will be used in the following call_api function 13 | The context can be. For example, the retrieved list of documents 14 | this is an example, and we will return the context that is defined in the csv file 15 | other_vars contains the vars of the csv file. prompt contains the prompt in the config_json.yml 16 | 17 | Args: 18 | prompt (str): The prompt used in the configuration file (prompts section of config_json.yml). 19 | other_vars (dict): A dictionary containing variables from a CSV file. 20 | 21 | """ 22 | context = [ 23 | "The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world.", 24 | "Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.", 25 | "The ruling may impact international organizations and non-governmental groups that work on reproductive rights and women's health issues.", 26 | ] 27 | return {"output": json.dumps(context, ensure_ascii=False)} 28 | 29 | 30 | def call_api(prompt, options, context) -> dict[str, str]: 31 | """Function used by default by promptfoo. Check the config_json.yml. 32 | 33 | Args: 34 | prompt (str): The prompt used in the configuration file (prompts section of config_json.yml). 35 | options: 36 | context (dict): A dictionary containing the other_vars and context return by the previous function get_var 37 | 38 | 39 | """ 40 | query = safe_eval(context["vars"]["query"]) 41 | output = {list(query.keys())[0]: "test"} 42 | result = { 43 | "output": json.dumps(output, ensure_ascii=False), 44 | } 45 | 46 | return result 47 | -------------------------------------------------------------------------------- /src/evaluation/configs/config_json.yaml: -------------------------------------------------------------------------------- 1 | # Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide 2 | description: "My eval" 3 | 4 | prompts: 5 | # the {{query}} and {{context}} are the columns in the dataset (test_json.csv) 6 | - | 7 | You are an internal corporate chatbot. 8 | Respond to this query: {{query}} which is a json in form of {field:query} and return a json object in the form of {field:answer} 9 | - Keep the name of fields and just add the answer to it 10 | - Do not include the '''json ''' in your response. Just the json content. 11 | - Here is some context that you can use to write your response: {{context}} 12 | providers: 13 | - id: azureopenai:chat:{{env.AZURE_OPENAI_DEPLOYMENT_NAME}} # env variables are in .env 14 | label: '{{env.AZURE_OPENAI_DEPLOYMENT_NAME}}' 15 | 16 | # - id: openai:chat:phi3:3.8b-mini-4k-instruct-q4_K_M 17 | # config: 18 | # apiHost: localhost:11434/v1/ 19 | # apiKey: ollama 20 | ## - id: ollama:chat:phi3:3.8b-mini-4k-instruct-q4_K_M # env variables are in .env 21 | # - id: file://../configs/config_json.py 22 | # label: '{{ env.AZURE_OPENAI_DEPLOYMENT_NAME }}' 23 | 24 | 25 | defaultTest: 26 | assert: 27 | 28 | # retrieval metrics: evaluating retrieved contexts against relevant contexts 29 | # Order unaware retrieval metrics 30 | - type: python 31 | value: file://../metrics/order_unaware/precision_at_k.py 32 | metric: PrecisionK 33 | - type: python 34 | value: file://../metrics/order_unaware/recall_at_k.py 35 | metric: RecallK 36 | - type: python 37 | value: file://../metrics/order_unaware/f1_at_k.py 38 | metric: F1K 39 | # Order aware retrieval metrics 40 | - type: python 41 | value: file://../metrics/order_aware/reciprocal_rank.py 42 | metric: Mean Reciprocal Rank 43 | 44 | # end-task: evaluating ground truth vs generated answer 45 | - type: python 46 | value: file://../metrics/information_extraction/missing_fields.py 47 | metric: Missing Fields 48 | - type: python 49 | value: file://../metrics/information_extraction/exact_match_json.py 50 | metric: Exact Match JSON 51 | - type: python 52 | value: file://../metrics/information_extraction/similarity_json.py 53 | metric: Similarity JSON 54 | - type: equals 55 | value: '{{ground_truth}}' 56 | - type: contains-json 57 | - type: is-json 58 | - type: python 59 | value: file://../metrics/ragas_metrics/ragas_answer_similarity.py 60 | metric: Ragas Answer Similarity 61 | # - type: python 62 | # value: file://../metrics/ragas_metrics/ragas_answer_correctness.py 63 | # metric: Ragas Answer Correctness 64 | 65 | # evaluating answer vs retrieved context: 66 | # - type: python 67 | # value: file://../metrics/ragas_metrics/ragas_answer_relevancy.py 68 | # metric: Ragas Answer Relevancy 69 | 70 | # retrieval metrics: evaluating retrieved contexts against ground truth 71 | - type: python 72 | value: file://../metrics/ragas_metrics/ragas_context_recall.py 73 | metric: Ragas Context Recall 74 | - type: python 75 | value: file://../metrics/ragas_metrics/ragas_context_precision.py 76 | metric: Ragas Context Precision 77 | # - type: python 78 | # value: file://../metrics/ragas_metrics/ragas_context_entity_recall.py 79 | # metric: Ragas Context Entity Recall 80 | # - type: python 81 | # value: file://../metrics/ragas_metrics/ragas_context_utilization.py 82 | # metric: Ragas Context Utilization 83 | 84 | 85 | 86 | ## latency needs cache to be off 87 | # - type: latency 88 | # threshold: 5000 89 | tests: 90 | # - vars: 91 | # language: [ Spanish, French ] 92 | # input: [ 'Hello world' ] 93 | # - file://data/tests_2.csv 94 | 95 | # - file://./data/tests_2.csv 96 | - file://../data/test_json.csv 97 | -------------------------------------------------------------------------------- /src/evaluation/configs/config_simple.yaml: -------------------------------------------------------------------------------- 1 | # Learn more about building a configuration: https://promptfoo.dev/docs/configuration/guide 2 | description: "My eval" 3 | 4 | prompts: 5 | # the {{query}} and {{context}} are the columns in the dataset (test_simple.csv) 6 | - | 7 | You are an internal corporate chatbot. 8 | Respond to this query: {{query}} 9 | - Here is some context that you can use to write your response: {{context}} 10 | 11 | 12 | providers: 13 | - id: openai:chat:{{env.OPENAI_DEPLOYMENT_NAME}} # env variables are in .env 14 | label: '{{env.OPENAI_DEPLOYMENT_NAME}}' 15 | 16 | 17 | 18 | defaultTest: 19 | assert: 20 | # retrieval metrics: evaluating retrieved contexts against relevant contexts 21 | # Order unaware retrieval metrics 22 | - type: python 23 | value: file://../metrics/order_unaware/precision_at_k.py 24 | metric: PrecisionK 25 | - type: python 26 | value: file://../metrics/order_unaware/recall_at_k.py 27 | metric: RecallK 28 | - type: python 29 | value: file://../metrics/order_unaware/f1_at_k.py 30 | metric: F1K 31 | # Order aware retrieval metrics 32 | - type: python 33 | value: file://../metrics/order_aware/reciprocal_rank.py 34 | metric: Mean Reciprocal Rank 35 | 36 | # end-task: evaluating ground truth vs generated answer 37 | - type: equals 38 | value: '{{ground_truth}}' 39 | # - type: python 40 | # value: file://../metrics/ragas_metrics/ragas_answer_similarity.py 41 | # metric: Ragas Answer Similarity 42 | # - type: python 43 | # value: file://../metrics/ragas_metrics/ragas_answer_correctness.py 44 | # metric: Ragas Answer Correctness 45 | 46 | # evaluating answer vs retrieved context: 47 | # - type: python 48 | # value: file://../metrics/ragas_metrics/ragas_answer_relevancy.py 49 | # metric: Ragas Answer Relevancy 50 | 51 | # retrieval metrics: evaluating retrieved contexts against ground truth 52 | # - type: python 53 | # value: file://../metrics/ragas_metrics/ragas_context_recall.py 54 | # metric: Ragas Context Recall 55 | # - type: python 56 | # value: file://../metrics/ragas_metrics/ragas_context_precision.py 57 | # metric: Ragas Context Precision 58 | # - type: python 59 | # value: file://../metrics/ragas_metrics/ragas_context_entity_recall.py 60 | # metric: Ragas Context Entity Recall 61 | # - type: python 62 | # value: file://../metrics/ragas_metrics/ragas_context_utilization.py 63 | # metric: Ragas Context Utilization 64 | 65 | 66 | 67 | ## latency needs cache to be off 68 | # - type: latency 69 | # threshold: 5000 70 | tests: 71 | - file://../data/test_simple.csv 72 | -------------------------------------------------------------------------------- /src/evaluation/configs/redteam_config.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://promptfoo.dev/config-schema.json 2 | 3 | # Red teaming configuration 4 | 5 | # Docs: https://promptfoo.dev/docs/red-team/configuration 6 | description: "My first red team" 7 | 8 | # or targets. generates the output 9 | providers: 10 | # Red team targets. To talk directly to your application, use a custom provider. 11 | # See https://promptfoo.dev/docs/red-team/configuration/#providers 12 | - id: azureopenai:chat:{{env.AZURE_OPENAI_DEPLOYMENT_NAME}} 13 | label: '{{env.AZURE_OPENAI_DEPLOYMENT_NAME}}' 14 | 15 | 16 | # Other redteam settings 17 | redteam: 18 | # attacker provider: generates adversial outputs. Some providers such as Anthropic may disable your account for generating harmful test cases. We recommend using the default OpenAI provider. 19 | provider: 20 | id: azureopenai:chat:phi3:3.8b-mini-4k-instruct-q4_K_M 21 | 22 | purpose: "travel test app" 23 | # Default number of inputs to generate for each plugin. 24 | # The total number of tests will be (numTests * plugins.length * (1 + strategies.length)) 25 | numTests: 1 # Each plugin generates 1 adversarial inputs. 26 | 27 | # To control the number of tests for each plugin, use: 28 | # - id: plugin-name 29 | # numTests: 10 30 | plugins: 31 | - hallucination # Model generating false or misleading information 32 | - harmful:hate # Content that promotes hate or discrimination 33 | # 34 | # 35 | # # Attack methods for applying adversarial inputs 36 | # strategies: 37 | # - jailbreak # Attempts to bypass security measures through iterative prompt refinement 38 | # - prompt-injection # Malicious inputs designed to manipulate the model's behavior 39 | 40 | #defaultTest: 41 | # options: 42 | # # grader/ evaluator : evaluates the generated outputs if llm-rubric metrics are used moderation 43 | # provider: 44 | # id: azureopenai:chat:{{env.AZURE_OPENAI_DEPLOYMENT_NAME}} 45 | -------------------------------------------------------------------------------- /src/evaluation/context.py: -------------------------------------------------------------------------------- 1 | # Learn more about using dynamic variables: https://promptfoo.dev/docs/configuration/guide/#import-vars-from-separate-files 2 | def get_var(var_name, prompt, other_vars): 3 | # This is where you can fetch documents from a database, call an API, etc. 4 | # ... 5 | 6 | if var_name == "context": 7 | # Return value based on the variable name and test context 8 | return {"output": f"... Documents for {other_vars['inquiry']} in prompt: {prompt} ..."} 9 | 10 | # Default variable value 11 | return {"output": "Document A, Document B, Document C, ..."} 12 | 13 | # Handle potential errors 14 | # return { 'error': 'Error message' } 15 | -------------------------------------------------------------------------------- /src/evaluation/data/test_json.csv: -------------------------------------------------------------------------------- 1 | query,ground_truth,context,relevant_context 2 | "{""global_implications"":""What are the global implications of the USA Supreme Court ruling on abortion?""}","{ ""global_implications"": ""The USA Supreme Court ruling on abortion has sparked intense debates and discussions globally, influencing the discourse on reproductive rights and women's health.""}","file://../configs/config_json.py","[ 3 | 'The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world.', 4 | 'Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.', 5 | 'The ruling may impact international organizations and non-governmental groups that work on reproductive rights and women\'s health issues.', 6 | 'Depending on the outcome, there could be shifts in funding, advocacy efforts, and collaborations with American counterparts, leading to ripple effects in the global fight for reproductive justice.', 7 | 'Several European countries have announced plans to review their own abortion laws in light of the US ruling, with some considering more restrictive measures.', 8 | 'A new international coalition, \'Global Choice\', has been formed by 15 countries to promote and protect abortion rights worldwide in response to the US decision.']" 9 | -------------------------------------------------------------------------------- /src/evaluation/data/test_simple.csv: -------------------------------------------------------------------------------- 1 | query,ground_truth,answer,context,relevant_context 2 | What are the global implications of the USA Supreme Court ruling on abortion?,"The global implications of the USA Supreme Court ruling on abortion are significant. The ruling has led to limited or no access to abortion for one in three women and girls of reproductive age in states where abortion access is restricted. These states also have weaker maternal health support, higher maternal death rates, and higher child poverty rates. Additionally, the ruling has had an impact beyond national borders due to the USA's geopolitical and cultural influence globally. Organizations and activists worldwide are concerned that the ruling may inspire anti-abortion legislative and policy attacks in other countries. The ruling has also hindered progressive law reform and the implementation of abortion guidelines in certain African countries. Furthermore, the ruling has created a chilling effect in international policy spaces, empowering anti-abortion actors to undermine human rights protections.","The global implications of the USA Supreme Court ruling on abortion can be significant, as it sets a precedent for other countries and influences the global discourse on reproductive rights. Here are some potential implications: 3 | 4 | 1. Influence on other countries: The Supreme Court's ruling can serve as a reference point for other countries grappling with their own abortion laws. It can provide legal arguments and reasoning that advocates for reproductive rights can use to challenge restrictive abortion laws in their respective jurisdictions. 5 | 6 | 2. Strengthening of global reproductive rights movements: A favorable ruling by the Supreme Court can energize and empower reproductive rights movements worldwide. It can serve as a rallying point for activists and organizations advocating for women's rights, leading to increased mobilization and advocacy efforts globally. 7 | 8 | 3. Counteracting anti-abortion movements: Conversely, a ruling that restricts abortion rights can embolden anti-abortion movements globally. It can provide legitimacy to their arguments and encourage similar restrictive measures in other countries, potentially leading to a rollback of existing reproductive rights. 9 | 10 | 4. Impact on international aid and policies: The Supreme Court's ruling can influence international aid and policies related to reproductive health. It can shape the priorities and funding decisions of donor countries and organizations, potentially leading to increased support for reproductive rights initiatives or conversely, restrictions on funding for abortion-related services. 11 | 12 | 5. Shaping international human rights standards: The ruling can contribute to the development of international human rights standards regarding reproductive rights. It can influence the interpretation and application of existing human rights treaties and conventions, potentially strengthening the recognition of reproductive rights as fundamental human rights globally. 13 | 14 | 6. Global health implications: The Supreme Court's ruling can have implications for global health outcomes, particularly in countries with restrictive abortion laws. It can impact the availability and accessibility of safe and legal abortion services, potentially leading to an increase in unsafe abortions and related health complications. 15 | 16 | It is important to note that the specific implications will depend on the nature of the Supreme Court ruling and the subsequent actions taken by governments, activists, and organizations both within and outside the United States.","[ 17 | 'The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world.', 18 | 'Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.', 19 | 'The ruling may impact international organizations and non-governmental groups that work on reproductive rights and women\'s health issues.']","[ 20 | 'The USA Supreme Court ruling on abortion has sparked intense debates and discussions not only within the country but also around the world.', 21 | 'Many countries look to the United States as a leader in legal and social issues, so the decision could potentially influence the policies and attitudes towards abortion in other nations.', 22 | 'The ruling may impact international organizations and non-governmental groups that work on reproductive rights and women\'s health issues.', 23 | 'Depending on the outcome, there could be shifts in funding, advocacy efforts, and collaborations with American counterparts, leading to ripple effects in the global fight for reproductive justice.', 24 | 'Several European countries have announced plans to review their own abortion laws in light of the US ruling, with some considering more restrictive measures.', 25 | 'A new international coalition, \'Global Choice\', has been formed by 15 countries to promote and protect abortion rights worldwide in response to the US decision.']" 26 | -------------------------------------------------------------------------------- /src/evaluation/instructions.txt: -------------------------------------------------------------------------------- 1 | - Update the Makefile to change the configuration (src/evaluation/configs/..) 2 | -------------------------------------------------------------------------------- /src/evaluation/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/__init__.py -------------------------------------------------------------------------------- /src/evaluation/metrics/data_types.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class GradingResult(BaseModel): 7 | pass_: bool # 'pass' is a reserved keyword in Python 8 | score: float 9 | reason: str 10 | component_results: Optional[list["GradingResult"]] = None 11 | named_scores: Optional[dict[str, float]] = None # Appear as metrics in the UI 12 | -------------------------------------------------------------------------------- /src/evaluation/metrics/information_extraction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/information_extraction/__init__.py -------------------------------------------------------------------------------- /src/evaluation/metrics/information_extraction/entity_level.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/information_extraction/entity_level.py -------------------------------------------------------------------------------- /src/evaluation/metrics/information_extraction/exact_match_json.py: -------------------------------------------------------------------------------- 1 | from pydantic import ValidationError 2 | 3 | from evaluation.metrics.utils import create_dynamic_model, convert_to_json 4 | 5 | 6 | def get_assert(output: str, context): 7 | """Evaluates the precision at k.""" 8 | threshold = 0.99 9 | llm_answer, true_answer = convert_to_json(output, context, threshold) 10 | 11 | try: 12 | model_true_answer = create_dynamic_model(true_answer) 13 | true_answer = model_true_answer(**true_answer) 14 | 15 | llm_answer = model_true_answer(**llm_answer) 16 | 17 | if llm_answer == true_answer: 18 | score = 1.0 19 | reason = f"{score} > {threshold} = {score > threshold}" 20 | else: 21 | dict_a = llm_answer.model_dump() 22 | dict_b = true_answer.model_dump() 23 | differences = [key for key in dict_b.keys() if dict_a.get(key) != dict_b.get(key)] 24 | 25 | score = round(float(1 - (len(differences) / len(llm_answer.model_fields))), 2) 26 | 27 | reason = f"{score} > {threshold} = {score > threshold}. Number of differences: {len(differences)}. Differences: {differences}" 28 | 29 | except ValidationError as e: 30 | total_fields = len(llm_answer.model_fields) 31 | errors_count = len(e.errors()) 32 | score = round(float(1 - (errors_count / total_fields)), 2) 33 | reason = str(e) 34 | 35 | return { 36 | "pass": score > threshold, 37 | "score": score, 38 | "reason": reason, 39 | } 40 | -------------------------------------------------------------------------------- /src/evaluation/metrics/information_extraction/missing_fields.py: -------------------------------------------------------------------------------- 1 | from pydantic import ValidationError 2 | 3 | from evaluation.metrics.data_types import GradingResult 4 | from evaluation.metrics.utils import create_dynamic_model, convert_to_json 5 | 6 | 7 | def get_assert(output: str, context) -> GradingResult: 8 | """Evaluates the precision at k.""" 9 | threshold = 0.99 10 | 11 | llm_answer, true_answer = convert_to_json(output, context, threshold) 12 | 13 | try: 14 | model_true_answer = create_dynamic_model(true_answer) 15 | # true_answer = model_true_answer(**true_answer) 16 | 17 | llm_answer = model_true_answer(**llm_answer) 18 | null_fields = [key for key, value in llm_answer.model_dump().items() if value is None] 19 | 20 | score = round(float(1 - (len(null_fields) / len(llm_answer.model_fields))), 2) 21 | 22 | reason = ( 23 | f"{score} > {threshold} = {score > threshold}. Number of null fields: {len(null_fields)}. " 24 | f"null_fields: {null_fields}" 25 | ) 26 | except ValidationError as e: 27 | error = validation_error_message(e) 28 | total_fields = len(llm_answer.model_fields) 29 | errors_count = len(error.errors()) 30 | score = float(1 - (errors_count / total_fields)) 31 | reason = str(error) 32 | 33 | return { 34 | "pass": score > threshold, 35 | "score": score, 36 | "reason": reason, 37 | } 38 | 39 | 40 | def validation_error_message(error: ValidationError) -> ValidationError: 41 | for err in error.errors(): 42 | err.pop("input") 43 | err.pop("url") 44 | 45 | return error 46 | -------------------------------------------------------------------------------- /src/evaluation/metrics/information_extraction/similarity_json.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pydantic import ValidationError, BaseModel 3 | 4 | from evaluation.metrics.utils import ( 5 | create_dynamic_model, 6 | convert_to_json, 7 | ) 8 | from utils import llmaaj_embedding_client 9 | 10 | 11 | def get_assert(output: str, context): 12 | """Evaluates the precision at k.""" 13 | threshold = 0.99 14 | llm_answer, true_answer = convert_to_json(output, context, threshold) 15 | 16 | try: 17 | model_true_answer = create_dynamic_model(true_answer) 18 | true_answer = model_true_answer(**true_answer) 19 | 20 | llm_answer = model_true_answer(**llm_answer) 21 | 22 | if llm_answer == true_answer: 23 | score = 1.0 24 | reason = f"{score} > {threshold} = {score > threshold}" 25 | else: 26 | dict_a = llm_answer.model_dump() 27 | dict_b = true_answer.model_dump() 28 | differences = [key for key in dict_b.keys() if dict_a.get(key) != dict_b.get(key)] 29 | 30 | num_similar_fields = len(llm_answer.model_fields) - len(differences) 31 | 32 | result, similarity = compare_pydantic_objects(llm_answer, true_answer, differences) 33 | score = round( 34 | float((num_similar_fields + similarity) / len(llm_answer.model_fields)), 35 | 2, 36 | ) 37 | 38 | reason = f"{score} > {threshold} = {score > threshold}. Number of differences: {len(differences)}. Differences: {result}" 39 | 40 | except ValidationError as e: 41 | total_fields = len(llm_answer.model_fields) 42 | errors_count = len(e.errors()) 43 | score = round(float(1 - (errors_count / total_fields)), 2) 44 | reason = str(e) 45 | 46 | return { 47 | "pass": score > threshold, 48 | "score": score, 49 | "reason": reason, 50 | } 51 | 52 | 53 | def compare_pydantic_objects( 54 | obj1: BaseModel, obj2: BaseModel, differences: list = None 55 | ) -> dict[str, float]: 56 | """Compare two Pydantic objects using cosine similarity.""" 57 | result = {} 58 | total_similarity = 0 59 | similarity = 0 60 | if not differences: 61 | differences = obj1.model_fields 62 | 63 | for field in differences: 64 | value1 = getattr(obj1, field) 65 | value2 = getattr(obj2, field) 66 | if value1 != value2: 67 | if value1 and value2: 68 | embedding1 = llmaaj_embedding_client.embed_query(text=str(value1)) 69 | embedding2 = llmaaj_embedding_client.embed_query(text=str(value2)) 70 | similarity = round(cosine_similarity(embedding1, embedding2), 2) 71 | else: 72 | similarity = 0 73 | else: 74 | similarity = 1 75 | 76 | result[field] = similarity 77 | total_similarity += similarity 78 | return result, total_similarity 79 | 80 | 81 | def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: 82 | """Calculate cosine similarity between two vectors.""" 83 | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) 84 | -------------------------------------------------------------------------------- /src/evaluation/metrics/order_aware/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/order_aware/__init__.py -------------------------------------------------------------------------------- /src/evaluation/metrics/order_aware/reciprocal_rank.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from evaluation.metrics.data_types import GradingResult 4 | from utils import safe_eval 5 | 6 | 7 | # pomptfoo cwd is evaluations 8 | 9 | 10 | # def ragas_context_answer_similarity(input, output, reference, metadata, expected) -> float: 11 | # def get_assert(output: str, context) -> Union[bool, float, Dict[str, Any]]: 12 | def get_assert(output: str, context) -> GradingResult: 13 | """Evaluates the precision at k.""" 14 | retrieved_docs = safe_eval(context["vars"]["context"]) 15 | relevant_docs = safe_eval(context["vars"]["relevant_context"]) 16 | 17 | score = 0 18 | # compute Reciprocal Rank 19 | try: 20 | score = round(1 / (relevant_docs.index(retrieved_docs[0]) + 1), 2) 21 | except ValueError: 22 | score = -1 23 | 24 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 25 | threshold = 0 26 | 27 | if math.isnan(score): 28 | score = 0.0 29 | 30 | return { 31 | "pass": score > threshold, 32 | "score": score, 33 | "reason": f"{score} > {threshold} = {score > threshold}", 34 | } 35 | -------------------------------------------------------------------------------- /src/evaluation/metrics/order_unaware/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/order_unaware/__init__.py -------------------------------------------------------------------------------- /src/evaluation/metrics/order_unaware/f1_at_k.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | from evaluation.metrics.data_types import GradingResult 5 | from evaluation.metrics.order_unaware import precision_at_k, recall_at_k 6 | 7 | 8 | def get_assert(output: str, context) -> GradingResult: 9 | """Calculates F1@k.""" 10 | precision = precision_at_k.get_assert(context=context, output=output)["score"] 11 | recall = recall_at_k.get_assert(context=context, output=output)["score"] 12 | 13 | if precision + recall == 0: 14 | score = 0.0 15 | else: 16 | score = round(float(2 * (precision * recall) / (precision + recall)), 2) 17 | 18 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 19 | threshold = 0 20 | 21 | if math.isnan(score): 22 | score = 0.0 23 | 24 | return { 25 | "pass": score > threshold, 26 | "score": score, 27 | "reason": f"{score} > {threshold} = {score > threshold}", 28 | } 29 | -------------------------------------------------------------------------------- /src/evaluation/metrics/order_unaware/precision_at_k.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | 4 | from evaluation.metrics.data_types import GradingResult 5 | from utils import safe_eval 6 | 7 | 8 | def get_assert(output: str, context) -> GradingResult: 9 | """Evaluates the precision at k.""" 10 | retrieved_docs = safe_eval(context["vars"]["context"]) 11 | relevant_docs = safe_eval(context["vars"]["relevant_context"]) 12 | k = os.environ.get("K", 3) 13 | retrieved_docs_at_k = retrieved_docs[:k] 14 | relevant_count = sum([1 for doc in retrieved_docs_at_k if doc in relevant_docs]) 15 | score = float(relevant_count / k) 16 | 17 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 18 | threshold = 0 19 | 20 | if math.isnan(score): 21 | score = 0.0 22 | 23 | return { 24 | "pass": score > threshold, 25 | "score": score, 26 | "reason": f"{score} > {threshold} = {score > threshold}", 27 | } 28 | -------------------------------------------------------------------------------- /src/evaluation/metrics/order_unaware/recall_at_k.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | 4 | from evaluation.metrics.data_types import GradingResult 5 | from utils import safe_eval, time_function 6 | 7 | 8 | @time_function 9 | def get_assert(output: str, context) -> GradingResult: 10 | retrieved_docs = safe_eval(context["vars"]["context"]) 11 | relevant_docs = safe_eval(context["vars"]["relevant_context"]) 12 | k = os.environ.get("K", 3) 13 | retrieved_docs_at_k = retrieved_docs[:k] 14 | relevant_count = sum([1 for doc in retrieved_docs_at_k if doc in relevant_docs]) 15 | score = round(float(relevant_count / len(relevant_docs)), 2) 16 | 17 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 18 | threshold = 0 19 | 20 | if math.isnan(score): 21 | score = 0.0 22 | 23 | return { 24 | "pass": score > threshold, 25 | "score": score, 26 | "reason": f"{score} > {threshold} = {score > threshold}", 27 | } 28 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/evaluation/metrics/ragas_metrics/__init__.py -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_answer_correctness.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ragas import evaluate, RunConfig 4 | from ragas.metrics import answer_correctness 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | # def ragas_context_answer_similarity(input, output, reference, metadata, expected) -> float: 12 | # def get_assert(output: str, context) -> Union[bool, float, Dict[str, Any]]: 13 | def get_assert(output: str, context) -> GradingResult: 14 | eval_dataset = to_dataset(output=output, context=context) 15 | 16 | result = evaluate( 17 | eval_dataset, 18 | metrics=[answer_correctness], 19 | llm=llmaaj_chat_client, 20 | embeddings=llmaaj_embedding_client, 21 | run_config=RunConfig(max_workers=64), 22 | ).to_pandas() 23 | # 'score': result['answer_similarity'], 24 | 25 | score = float(result["answer_correctness"]) 26 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 27 | threshold = 0 28 | 29 | if math.isnan(score): 30 | score = 0.0 31 | 32 | return { 33 | "pass": score > threshold, 34 | "score": score, 35 | "reason": f"{score} > {threshold} = {score > threshold}", 36 | } 37 | 38 | 39 | if __name__ == "__main__": 40 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 41 | 42 | print("XXXX:", x) 43 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_answer_relevancy.py: -------------------------------------------------------------------------------- 1 | import math 2 | from ragas import evaluate, RunConfig 3 | from ragas.metrics import answer_relevancy 4 | 5 | from evaluation.metrics.data_types import GradingResult 6 | from evaluation.metrics.utils import to_dataset 7 | from utils import llmaaj_chat_client, llmaaj_embedding_client 8 | 9 | 10 | def get_assert(output: str, context) -> GradingResult: 11 | eval_dataset = to_dataset(output=output, context=context) 12 | 13 | result = evaluate( 14 | eval_dataset, 15 | metrics=[answer_relevancy], 16 | llm=llmaaj_chat_client, 17 | embeddings=llmaaj_embedding_client, 18 | run_config=RunConfig(max_workers=64), 19 | ).to_pandas() 20 | # 'score': result['answer_similarity'], 21 | 22 | score = float(result["answer_relevancy"]) 23 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 24 | threshold = 0 25 | 26 | if math.isnan(score): 27 | score = 0.0 28 | 29 | return { 30 | "pass": score > threshold, 31 | "score": score, 32 | "reason": f"{score} > {threshold} = {score > threshold}", 33 | } 34 | 35 | 36 | if __name__ == "__main__": 37 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 38 | 39 | print("XXXX:", x) 40 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_answer_similarity.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ragas import evaluate, RunConfig 4 | from ragas.metrics import answer_similarity 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | from utils import time_function 10 | 11 | 12 | @time_function 13 | def get_assert(output: str, context) -> GradingResult: 14 | eval_dataset = to_dataset(output=output, context=context) 15 | 16 | result = evaluate( 17 | eval_dataset, 18 | metrics=[ 19 | answer_similarity, 20 | ], 21 | llm=llmaaj_chat_client, 22 | embeddings=llmaaj_embedding_client, 23 | run_config=RunConfig(max_workers=64), 24 | ).to_pandas() 25 | # 'score': result['answer_similarity'], 26 | score = float(result["semantic_similarity"]) 27 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 28 | threshold = 0 29 | 30 | if math.isnan(score): 31 | score = 0.0 32 | 33 | return { 34 | "pass": score > threshold, 35 | "score": score, 36 | "reason": f"{score} > {threshold} = {score > threshold}", 37 | } 38 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_context_entity_recall.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ragas import evaluate, RunConfig 4 | from ragas.metrics import context_entity_recall 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | def get_assert(output: str, context) -> GradingResult: 12 | eval_dataset = to_dataset(output=output, context=context) 13 | 14 | result = evaluate( 15 | eval_dataset, 16 | metrics=[context_entity_recall], 17 | llm=llmaaj_chat_client, 18 | embeddings=llmaaj_embedding_client, 19 | run_config=RunConfig(max_workers=64), 20 | ).to_pandas() 21 | # 'score': result['answer_similarity'], 22 | 23 | score = float(result["context_entity_recall"]) 24 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 25 | threshold = 0 26 | 27 | if math.isnan(score): 28 | score = 0.0 29 | 30 | return { 31 | "pass": score > threshold, 32 | "score": score, 33 | "reason": f"{score} > {threshold} = {score > threshold}", 34 | } 35 | 36 | 37 | if __name__ == "__main__": 38 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 39 | 40 | print("XXXX:", x) 41 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_context_precision.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ragas import evaluate, RunConfig 4 | from ragas.metrics import context_precision 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | def get_assert(output: str, context) -> GradingResult: 12 | eval_dataset = to_dataset(output=output, context=context) 13 | 14 | result = evaluate( 15 | eval_dataset, 16 | metrics=[context_precision], 17 | llm=llmaaj_chat_client, 18 | embeddings=llmaaj_embedding_client, 19 | run_config=RunConfig(max_workers=64), 20 | ).to_pandas() 21 | # 'score': result['answer_similarity'], 22 | 23 | score = float(result["context_precision"]) 24 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 25 | threshold = 0 26 | 27 | if math.isnan(score): 28 | score = 0.0 29 | 30 | return { 31 | "pass": score > threshold, 32 | "score": score, 33 | "reason": f"{score} > {threshold} = {score > threshold}", 34 | } 35 | 36 | 37 | if __name__ == "__main__": 38 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 39 | 40 | print("XXXX:", x) 41 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_context_recall.py: -------------------------------------------------------------------------------- 1 | import math 2 | from ragas import evaluate, RunConfig 3 | from ragas.metrics import context_recall 4 | 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | from utils import time_function 10 | 11 | 12 | # def ragas_context_answer_similarity(input, output, reference, metadata, expected) -> float: 13 | # def get_assert(output: str, context) -> Union[bool, float, Dict[str, Any]]: 14 | @time_function 15 | def get_assert(output: str, context) -> GradingResult: 16 | eval_dataset = to_dataset(output=output, context=context) 17 | 18 | result = evaluate( 19 | eval_dataset, 20 | metrics=[context_recall], 21 | llm=llmaaj_chat_client, 22 | embeddings=llmaaj_embedding_client, 23 | run_config=RunConfig(max_workers=64), 24 | ).to_pandas() 25 | # 'score': result['answer_similarity'], 26 | 27 | score = float(result["context_recall"]) 28 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 29 | threshold = 0 30 | 31 | if math.isnan(score): 32 | score = 0.0 33 | 34 | return { 35 | "pass": score > threshold, 36 | "score": score, 37 | "reason": f"{score} > {threshold} = {score > threshold}", 38 | } 39 | 40 | 41 | if __name__ == "__main__": 42 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 43 | 44 | print("XXXX:", x) 45 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_context_utilization.py: -------------------------------------------------------------------------------- 1 | import math 2 | from ragas import evaluate, RunConfig 3 | from ragas.metrics import context_utilization 4 | 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | # def ragas_context_answer_similarity(input, output, reference, metadata, expected) -> float: 12 | # def get_assert(output: str, context) -> Union[bool, float, Dict[str, Any]]: 13 | def get_assert(output: str, context) -> GradingResult: 14 | eval_dataset = to_dataset(output=output, context=context) 15 | 16 | result = evaluate( 17 | eval_dataset, 18 | metrics=[context_utilization], 19 | llm=llmaaj_chat_client, 20 | embeddings=llmaaj_embedding_client, 21 | run_config=RunConfig(max_workers=64), 22 | ).to_pandas() 23 | # 'score': result['answer_similarity'], 24 | 25 | score = float(result["context_utilization"]) 26 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 27 | threshold = 0 28 | 29 | if math.isnan(score): 30 | score = 0.0 31 | 32 | return { 33 | "pass": score > threshold, 34 | "score": score, 35 | "reason": f"{score} > {threshold} = {score > threshold}", 36 | } 37 | 38 | 39 | if __name__ == "__main__": 40 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 41 | 42 | print("XXXX:", x) 43 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_faithfulness.py: -------------------------------------------------------------------------------- 1 | import math 2 | from ragas import evaluate, RunConfig 3 | from ragas.metrics import faithfulness 4 | 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | # def ragas_context_answer_similarity(input, output, reference, metadata, expected) -> float: 12 | # def get_assert(output: str, context) -> Union[bool, float, Dict[str, Any]]: 13 | def get_assert(output: str, context) -> GradingResult: 14 | eval_dataset = to_dataset(output=output, context=context) 15 | result = evaluate( 16 | eval_dataset, 17 | metrics=[faithfulness], 18 | llm=llmaaj_chat_client, 19 | embeddings=llmaaj_embedding_client, 20 | run_config=RunConfig(max_workers=64), 21 | ).to_pandas() 22 | # 'score': result['answer_similarity'], 23 | 24 | score = float(result["faithfulness"]) 25 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 26 | threshold = 0 27 | 28 | if math.isnan(score): 29 | score = 0.0 30 | 31 | return { 32 | "pass": score > threshold, 33 | "score": score, 34 | "reason": f"{score} > {threshold} = {score > threshold}", 35 | } 36 | 37 | 38 | if __name__ == "__main__": 39 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 40 | 41 | print("XXXX:", x) 42 | -------------------------------------------------------------------------------- /src/evaluation/metrics/ragas_metrics/ragas_harmfulness.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from ragas import evaluate, RunConfig 4 | from ragas.metrics.critique import harmfulness 5 | 6 | from evaluation.metrics.data_types import GradingResult 7 | from evaluation.metrics.utils import to_dataset 8 | from utils import llmaaj_chat_client, llmaaj_embedding_client 9 | 10 | 11 | def get_assert(output: str, context) -> GradingResult: 12 | eval_dataset = to_dataset(output=output, context=context) 13 | 14 | result = evaluate( 15 | eval_dataset, 16 | metrics=[harmfulness], 17 | llm=llmaaj_chat_client, 18 | embeddings=llmaaj_embedding_client, 19 | run_config=RunConfig(max_workers=64), 20 | ).to_pandas() 21 | # 'score': result['answer_similarity'], 22 | 23 | score = float(result["harmfulness"]) 24 | # threshold = context["test"]["metadata"]["threshold_ragas_as"] 25 | threshold = 0 26 | 27 | if math.isnan(score): 28 | score = 0.0 29 | 30 | return { 31 | "pass": score > threshold, 32 | "score": score, 33 | "reason": f"{score} > {threshold} = {score > threshold}", 34 | } 35 | 36 | 37 | if __name__ == "__main__": 38 | x = get_assert("blop", {"vars": {"ground_truth": "blop"}}) 39 | 40 | print("XXXX:", x) 41 | -------------------------------------------------------------------------------- /src/evaluation/metrics/utils.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | from typing import Optional 4 | 5 | from datasets import Dataset 6 | from pydantic import Field, create_model 7 | 8 | from utils import logger 9 | 10 | 11 | def safe_eval(x): 12 | try: 13 | return ast.literal_eval(x) 14 | except ValueError: 15 | raise Exception(f"Value error in safe") 16 | 17 | 18 | def to_dataset(output, context): 19 | # question, ground truth and output can be dict (json information extraction) or str 20 | # dict: for example '{field:question}' , ground_truth is '{field: ground_truth}', output is '{field: answer}' 21 | # or simply strings 22 | question = context["vars"]["query"] 23 | ground_truth = context["vars"]["ground_truth"] 24 | contexts = context["vars"]["context"] 25 | 26 | # todo: add if is json parameter and also in the promptfoo config to support 27 | # string responses, json responses, 28 | 29 | try: 30 | output = safe_eval(output) 31 | except Exception: 32 | logger.warning(f" safe eval output: {output}") 33 | 34 | try: 35 | question = safe_eval(question) 36 | except Exception: 37 | logger.warning(f" safe eval question: {question}") 38 | 39 | try: 40 | ground_truth = safe_eval(ground_truth) 41 | except Exception: 42 | logger.warning(f" in safe eval ground_truth: {ground_truth}") 43 | 44 | try: 45 | contexts = safe_eval(contexts) 46 | except Exception: 47 | logger.warning(f" in safe eval contexts: {contexts}") 48 | 49 | # context should be a list of strings as input and we transform it to a list of list of str because of ragas 50 | if isinstance(contexts, list): 51 | if isinstance(contexts[0], str): 52 | if isinstance(ground_truth, dict): 53 | # if the output is a json response, we will evaluate each element of the json response to each 54 | # element of the json ground_truth. For each element, we copy the contexts received for the whole json. 55 | contexts = [contexts for _ in range(len(ground_truth))] 56 | else: 57 | contexts = [contexts] 58 | elif isinstance(contexts[0], list) and isinstance(contexts[0][0], str): 59 | pass 60 | else: 61 | raise Exception( 62 | f"Value error in Context should be a list of strings. Context: {contexts}" 63 | ) 64 | else: 65 | raise Exception(f"Value error in Context should be a list of strings. Context: {contexts}") 66 | 67 | # question should be an str and we transform it to a list of string because of ragas 68 | if isinstance(question, dict) and isinstance( 69 | list(question.values())[0], str 70 | ): # format is {field: question} 71 | question = list(question.values()) 72 | elif isinstance(question, str): 73 | question = [question] 74 | elif not isinstance(question, list): 75 | raise Exception(f"Value error in question: {question}") 76 | 77 | # ground_truth should be an str and we transform it to a list of string because of ragas 78 | if isinstance(ground_truth, dict) and isinstance(list(ground_truth.values())[0], str): 79 | ground_truth = list(ground_truth.values()) 80 | elif isinstance(ground_truth, str): 81 | ground_truth = [ground_truth] 82 | elif not isinstance(ground_truth, list): 83 | raise Exception(f"Value error in ground_truth: {ground_truth}") 84 | 85 | # output should be an str and we transform it to a list of string because of ragas 86 | if isinstance(output, dict) and isinstance(list(output.values())[0], str): 87 | output = list(output.values()) 88 | elif isinstance(output, str): 89 | output = [output] 90 | else: 91 | raise Exception(f"Value error in output: {output}") 92 | 93 | # check if all the lists have the same length 94 | lengths = [len(ground_truth), len(contexts), len(question), len(output)] 95 | if len(set(lengths)) != 1: 96 | raise Exception( 97 | f"Output, ground truth, contexts and question should have the same length : " 98 | f"len output {len(output)}, len ground_truth {len(ground_truth)}, len contexts {len(contexts)}, " 99 | f"len question {len(question)}" 100 | ) 101 | 102 | return Dataset.from_dict( 103 | { 104 | "ground_truth": ground_truth, 105 | "answer": output, 106 | "contexts": contexts, 107 | "question": question, 108 | } 109 | ) 110 | 111 | 112 | def to_evaldataset(output, context): 113 | # question, ground truth and output can be dict (json information extraction) or str 114 | # dict: for example '{field:question}' , ground_truth is '{field: ground_truth}', output is '{field: answer}' 115 | # or simply strings 116 | question = context["vars"]["query"] 117 | ground_truth = context["vars"]["ground_truth"] 118 | contexts = context["vars"]["context"] 119 | 120 | # todo: add if is json parameter and also in the promptfoo config to support 121 | # string responses, json responses, 122 | 123 | try: 124 | output = safe_eval(output) 125 | except Exception: 126 | logger.warning(f" safe eval output: {output}") 127 | 128 | try: 129 | question = safe_eval(question) 130 | except Exception: 131 | logger.warning(f" safe eval question: {question}") 132 | 133 | try: 134 | ground_truth = safe_eval(ground_truth) 135 | except Exception: 136 | logger.warning(f" in safe eval ground_truth: {ground_truth}") 137 | 138 | try: 139 | contexts = safe_eval(contexts) 140 | except Exception: 141 | logger.warning(f" in safe eval contexts: {contexts}") 142 | 143 | # context should be a list of strings as input and we transform it to a list of list of str because of ragas 144 | if isinstance(contexts, list): 145 | if isinstance(contexts[0], str): 146 | if isinstance(ground_truth, dict): 147 | # if the output is a json response, we will evaluate each element of the json response to each 148 | # element of the json ground_truth. For each element, we copy the contexts received for the whole json. 149 | contexts = [contexts for _ in range(len(ground_truth))] 150 | else: 151 | contexts = [contexts] 152 | elif isinstance(contexts[0], list) and isinstance(contexts[0][0], str): 153 | pass 154 | else: 155 | raise Exception( 156 | f"Value error in Context should be a list of strings. Context: {contexts}" 157 | ) 158 | else: 159 | raise Exception(f"Value error in Context should be a list of strings. Context: {contexts}") 160 | 161 | # question should be an str and we transform it to a list of string because of ragas 162 | if isinstance(question, dict) and isinstance( 163 | list(question.values())[0], str 164 | ): # format is {field: question} 165 | question = list(question.values()) 166 | elif isinstance(question, str): 167 | question = [question] 168 | elif not isinstance(question, list): 169 | raise Exception(f"Value error in question: {question}") 170 | 171 | # ground_truth should be an str and we transform it to a list of string because of ragas 172 | if isinstance(ground_truth, dict) and isinstance(list(ground_truth.values())[0], str): 173 | ground_truth = list(ground_truth.values()) 174 | elif isinstance(ground_truth, str): 175 | ground_truth = [ground_truth] 176 | elif not isinstance(ground_truth, list): 177 | raise Exception(f"Value error in ground_truth: {ground_truth}") 178 | 179 | # output should be an str and we transform it to a list of string because of ragas 180 | if isinstance(output, dict) and isinstance(list(output.values())[0], str): 181 | output = list(output.values()) 182 | elif isinstance(output, str): 183 | output = [output] 184 | else: 185 | raise Exception(f"Value error in output: {output}") 186 | 187 | # check if all the lists have the same length 188 | lengths = [len(ground_truth), len(contexts), len(question), len(output)] 189 | if len(set(lengths)) != 1: 190 | raise Exception( 191 | f"Output, ground truth, contexts and question should have the same length : " 192 | f"len output {len(output)}, len ground_truth {len(ground_truth)}, len contexts {len(contexts)}, " 193 | f"len question {len(question)}" 194 | ) 195 | 196 | return Dataset.from_dict( 197 | { 198 | "ground_truth": ground_truth, 199 | "answer": output, 200 | "contexts": contexts, 201 | "question": question, 202 | } 203 | ) 204 | 205 | 206 | def create_dynamic_model(input_dict: dict): 207 | fields = { 208 | i: (Optional[str], Field(default=None, description=question)) 209 | for i, question in input_dict.items() 210 | } 211 | 212 | return create_model("DynamicModel", **fields) 213 | 214 | 215 | def convert_to_json(output, context, threshold): 216 | try: 217 | if not isinstance(output, dict): 218 | llm_answer = json.loads(output) 219 | else: 220 | llm_answer = output 221 | true_answer = json.loads(context["vars"]["ground_truth"]) 222 | return llm_answer, true_answer 223 | except Exception: 224 | score = 0 225 | return { 226 | "pass": score > threshold, 227 | "score": score, 228 | "reason": "answer or ground_truth is not a valid json to be used in this metric", 229 | } 230 | -------------------------------------------------------------------------------- /src/main_backend.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | 3 | from api.log_config import LOGGING_CONFIG 4 | from utils import logger, settings 5 | 6 | if __name__ == "__main__": 7 | if settings.DEV_MODE: 8 | logger.info("Running app in DEV mode") 9 | reload = True 10 | LOGGING_CONFIG["loggers"]["uvicorn"]["level"] = "DEBUG" 11 | LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = "DEBUG" 12 | LOGGING_CONFIG["loggers"]["uvicorn.access"]["level"] = "DEBUG" 13 | else: 14 | logger.info("Running app in PROD mode") 15 | reload = False 16 | uvicorn.run( 17 | app="api.api:app", 18 | host=settings.FASTAPI_HOST, 19 | port=settings.FASTAPI_PORT, 20 | reload=reload, 21 | log_config=LOGGING_CONFIG, 22 | ) 23 | -------------------------------------------------------------------------------- /src/main_frontend.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | st.write("# Home Page") 4 | 5 | st.write( 6 | """This application template showcases the versatility of Streamlit & FastAPI, allowing you to choose between using Streamlit alone or integrating it with FastAPI for enhanced backend capabilities. 7 | There are two pages showcasing the connection with azure : 8 | 9 | - Not using FastAPI: for example, you can directly interact with Azure Blob Storage, manage files, and perform operations like uploading and deleting documents—all. 10 | 11 | - Using FastAPI : leverage the power of FastAPI, this template provides a foundation for building interactive and scalable applications (For example : RAG). the FastAPI backend interacts with Azure Search, allowing you to search for documents and retrieve relevant information. Streamlit then interacts with the FastAPI backend to display the search results. 12 | 13 | - Using only Azure Openai without ani RAG : just a chat. 14 | """ 15 | ) 16 | -------------------------------------------------------------------------------- /src/ml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/ml/__init__.py -------------------------------------------------------------------------------- /src/ml/ai.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from pydantic import BaseModel 3 | 4 | from utils import logger, settings, search_client 5 | 6 | 7 | def get_completions( 8 | messages: list, 9 | stream: bool = False, 10 | response_model: BaseModel = None, # Use Instructor library 11 | max_tokens: int = 1000, 12 | temperature: int = 0, 13 | top_p: int = 1, 14 | seed: int = 100, 15 | full_response: bool = False, 16 | client=None, 17 | ) -> str | BaseModel | None: 18 | """Returns a response from the azure openai model. 19 | 20 | Args: 21 | messages: 22 | stream: 23 | response_model: 24 | monitor: 25 | max_tokens: 26 | temperature: 27 | top_p: 28 | seed: 29 | full_response: 30 | client: 31 | 32 | Returns: 33 | response : str | BaseModel | None : 34 | """ 35 | input_dict = { 36 | "model": "aasasa", 37 | "messages": messages, 38 | "max_tokens": max_tokens, 39 | "temperature": temperature, 40 | "top_p": top_p, 41 | "seed": seed, 42 | "stream": stream, 43 | } 44 | # if response_model: 45 | # # if you use local models instead of openai models, the response_model feature may not work 46 | # client = instructor.from_openai(chat_client, mode=instructor.Mode.JSON) 47 | # input_dict["response_model"] = response_model 48 | 49 | if stream: 50 | raise NotImplementedError("Stream is not supported right now. Please set stream to False.") 51 | 52 | # todo: delete this function, use litellm instead. 53 | 54 | 55 | def get_related_document_ai_search(question): 56 | # todo: update to use InfernceLLMConfig 57 | logger.info(f"Azure AI search - find related documents: {question}") 58 | 59 | logger.info("Reformulate QUERY") 60 | system_prompt = "Tu es un modèle qui a pour fonction de convertir des questions utilisateur en phrase affirmative pour faciliter la recherche par similarité dans une base documentaire vectorielle. Modifiez la phrase utilisateur suivante en ce sens et retirez tout ce qui n'est pas pertinent, comment Bonjour, merci etc. Si c'est dans une autre langue que le Français, traduis la question en Français:" 61 | 62 | messages = [ 63 | {"role": "system", "content": system_prompt}, 64 | {"role": "user", "content": "Convertis cette phrase en affirmative " + question}, 65 | ] 66 | new_question = get_completions( 67 | messages=messages, 68 | ) 69 | logger.debug(f"{question} ==> {new_question}") 70 | content_docs = [] 71 | results = search_client.search( 72 | search_text=new_question, 73 | query_type="semantic", 74 | query_answer="extractive", 75 | semantic_configuration_name=settings.SEMENTIC_CONFIGURATION_NAME, 76 | top=settings.AZURE_SEARCH_TOP_K or 2, 77 | query_answer_count=settings.AZURE_SEARCH_TOP_K or 2, 78 | include_total_count=True, 79 | query_caption="extractive|highlight-true", 80 | ) 81 | for i, result in enumerate(results): 82 | for cap in result["@search.captions"]: 83 | # data = f"Document {i + 1}: {cap.text} \nRéférence: {result['filename']}\n===" 84 | data = f"Numéro document: {i + 1} - nom document:{result['title']} - text:{cap.text} \n===" 85 | content_docs.append(data) 86 | context = "\n".join(content_docs) 87 | return context 88 | 89 | 90 | def get_rag_response(user_input): 91 | """Return the response after running RAG. 92 | 93 | Args: 94 | user_input: 95 | settings: 96 | conversation_id: 97 | 98 | Returns: 99 | response: 100 | 101 | """ 102 | logger.info(f"Running RAG") 103 | 104 | context = get_related_document_ai_search(user_input) 105 | formatted_user_input = f"question :{user_input}, \n\n contexte : \n{context}." 106 | logger.info(f"RAG - final formatted prompt: {formatted_user_input}") 107 | 108 | response = get_completions( 109 | messages=[ 110 | { 111 | "role": "system", 112 | "content": "Tu est un chatbot qui répond aux questions.", 113 | }, 114 | {"role": "user", "content": formatted_user_input}, 115 | ], 116 | ) 117 | return response 118 | 119 | 120 | def run_azure_ai_search_indexer(): 121 | """Run the azure ai search index. 122 | 123 | Returns: 124 | res: response 125 | """ 126 | headers = { 127 | "Content-Type": "application/json", 128 | "api-key": settings.AZURE_SEARCH_API_KEY, 129 | } 130 | params = {"api-version": "2024-07-01"} 131 | url = f"{settings.AZURE_SEARCH_SERVICE_ENDPOINT}/indexers('{settings.AZURE_SEARCH_INDEXER_NAME}')/run" 132 | 133 | res = requests.post(url=url, headers=headers, params=params) 134 | logger.debug(f"run_azure_ai_search_index response: {res.status_code}") 135 | return res 136 | 137 | 138 | if __name__ == "__main__": 139 | print(run_azure_ai_search_indexer()) 140 | -------------------------------------------------------------------------------- /src/ml/llm.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Optional, Type 3 | 4 | import instructor 5 | import litellm 6 | from langfuse.decorators import observe 7 | from litellm import supports_response_schema, acompletion, completion, aembedding, embedding 8 | from pydantic import BaseModel, SecretStr, ConfigDict, model_validator 9 | from typing_extensions import Self 10 | 11 | 12 | from tenacity import ( 13 | retry, 14 | stop_after_attempt, 15 | wait_fixed, 16 | retry_if_exception_type, 17 | ) 18 | 19 | from utils import logger 20 | 21 | 22 | class InferenceLLMConfig(BaseModel): 23 | """Configuration for the inference model.""" 24 | 25 | model_name: str 26 | base_url: str 27 | api_key: SecretStr 28 | api_version: str = "2024-12-01-preview" # used only if model is from azure openai 29 | model_config = ConfigDict(arbitrary_types_allowed=True) 30 | 31 | supports_response_schema: bool = False 32 | 33 | temperature: Optional[float] = None 34 | seed: int = 1729 35 | max_tokens: Optional[int] = None 36 | 37 | @model_validator(mode="after") 38 | def init_client(self) -> Self: 39 | try: 40 | # check if the model supports structured output 41 | self.supports_response_schema = supports_response_schema(self.model_name.split("/")[-1]) 42 | logger.debug( 43 | f"\nModel: {self.model_name} Supports response schema: {self.supports_response_schema}" 44 | ) 45 | except Exception as e: 46 | # logger.exception(f"Error in initializing the LLM : {self}") 47 | logger.error(f"Error in initializing the LLM : {e}") 48 | raise e 49 | 50 | return self 51 | 52 | def load_model(self, prompt: str, schema: Type[BaseModel] = None, *args, **kwargs): 53 | pass 54 | 55 | @observe(as_type="generation") 56 | async def a_generate(self, prompt: str, schema: Type[BaseModel] = None, *args, **kwargs): 57 | messages = [{"role": "user", "content": prompt}] 58 | return await self.a_generate_from_messages( 59 | messages=messages, schema=schema, *args, **kwargs 60 | ) 61 | 62 | @observe(as_type="generation") 63 | @retry( 64 | wait=wait_fixed(60), 65 | stop=stop_after_attempt(6), 66 | retry=retry_if_exception_type( 67 | (litellm.exceptions.RateLimitError, instructor.exceptions.InstructorRetryException) 68 | ), 69 | ) 70 | async def a_generate_from_messages( 71 | self, messages: list, schema: Type[BaseModel] = None, *args, **kwargs 72 | ): 73 | # check if model supports structured output 74 | if schema: 75 | if self.supports_response_schema: 76 | res = await litellm.acompletion( 77 | model=self.model_name, 78 | api_key=self.api_key.get_secret_value(), 79 | base_url=self.base_url, 80 | messages=messages, 81 | response_format=schema, 82 | api_version=self.api_version, 83 | ) 84 | if res.choices[0].finish_reason == "content_filter": 85 | raise ValueError(f"Response filtred by content filter") 86 | else: 87 | dict_res = ast.literal_eval(res.choices[0].message.content) 88 | return schema(**dict_res) 89 | 90 | else: 91 | client = instructor.from_litellm(acompletion, mode=instructor.Mode.JSON) 92 | res, raw_completion = await client.chat.completions.create_with_completion( 93 | model=self.model_name, 94 | api_key=self.api_key.get_secret_value(), 95 | base_url=self.base_url, 96 | messages=messages, 97 | response_model=schema, 98 | api_version=self.api_version, 99 | ) 100 | return res 101 | else: 102 | res = await litellm.acompletion( 103 | model=self.model_name, 104 | api_key=self.api_key.get_secret_value(), 105 | base_url=self.base_url, 106 | messages=messages, 107 | api_version=self.api_version, 108 | ) 109 | return res.choices[0].message.content 110 | 111 | @observe(as_type="generation") 112 | def generate(self, prompt: str, schema: Type[BaseModel] = None, *args, **kwargs): 113 | messages = [{"role": "user", "content": prompt}] 114 | return self.generate_from_messages(messages=messages, schema=schema, *args, **kwargs) 115 | 116 | @observe(as_type="generation") 117 | @retry( 118 | wait=wait_fixed(60), 119 | stop=stop_after_attempt(6), 120 | retry=retry_if_exception_type( 121 | (litellm.exceptions.RateLimitError, instructor.exceptions.InstructorRetryException) 122 | ), 123 | ) 124 | def generate_from_messages( 125 | self, messages: list, schema: Type[BaseModel] = None, *args, **kwargs 126 | ): 127 | try: 128 | # check if model supports structured output 129 | if schema: 130 | if self.supports_response_schema: 131 | res = litellm.completion( 132 | model=self.model_name, 133 | api_key=self.api_key.get_secret_value(), 134 | base_url=self.base_url, 135 | messages=messages, 136 | response_format=schema, 137 | api_version=self.api_version, 138 | ) 139 | if res.choices[0].finish_reason == "content_filter": 140 | raise ValueError(f"Response filtred by content filter") 141 | else: 142 | dict_res = ast.literal_eval(res.choices[0].message.content) 143 | return schema(**dict_res) 144 | 145 | else: 146 | client = instructor.from_litellm(completion, mode=instructor.Mode.JSON) 147 | res, raw_completion = client.chat.completions.create_with_completion( 148 | model=self.model_name, 149 | api_key=self.api_key.get_secret_value(), 150 | base_url=self.base_url, 151 | messages=messages, 152 | response_model=schema, 153 | api_version=self.api_version, 154 | ) 155 | return res 156 | else: 157 | res = litellm.completion( 158 | model=self.model_name, 159 | api_key=self.api_key.get_secret_value(), 160 | base_url=self.base_url, 161 | messages=messages, 162 | api_version=self.api_version, 163 | ) 164 | 165 | return res.choices[0].message.content 166 | except Exception as e: 167 | # todo handle cost if exception 168 | logger.error(f"Error in generating response from LLM: {e}") 169 | return None 170 | 171 | def get_model_name(self, *args, **kwargs) -> str: 172 | return self.model_name 173 | 174 | 175 | class EmbeddingLLMConfig(InferenceLLMConfig): 176 | """Configuration for the embedding model.""" 177 | 178 | model_name: str 179 | base_url: str 180 | api_key: SecretStr 181 | api_version: str = "2024-12-01-preview" # used only if model is from azure openai 182 | model_config = ConfigDict(arbitrary_types_allowed=True) 183 | 184 | def load_model(self, prompt: str, schema: Type[BaseModel] = None, *args, **kwargs): 185 | pass 186 | 187 | def embed_text(self, text: str) -> list[float]: 188 | response = embedding( 189 | model=self.model_name, 190 | api_base=self.base_url, 191 | api_key=self.api_key.get_secret_value(), 192 | input=[text], 193 | ) 194 | return response.data[0]["embedding"] 195 | 196 | def embed_texts(self, texts: list[str]) -> list[list[float]]: 197 | response = embedding( 198 | model=self.model_name, 199 | api_base=self.base_url, 200 | api_key=self.api_key.get_secret_value(), 201 | input=texts, 202 | ) 203 | return [data.embedding for data in response.data] 204 | 205 | async def a_embed_text(self, text: str) -> list[float]: 206 | response = await aembedding( 207 | model=self.model_name, 208 | api_base=self.base_url, 209 | api_key=self.api_key.get_secret_value(), 210 | input=[text], 211 | ) 212 | return response.data[0]["embedding"] 213 | 214 | async def a_embed_texts(self, texts: list[str]) -> list[list[float]]: 215 | response = await aembedding( 216 | model=self.model_name, 217 | api_base=self.base_url, 218 | api_key=self.api_key.get_secret_value(), 219 | input=texts, 220 | ) 221 | return [data.embedding for data in response.data] 222 | 223 | def get_model_name(self): 224 | return self.model_name 225 | -------------------------------------------------------------------------------- /src/pages/0_chat.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | from ml.llm import InferenceLLMConfig, EmbeddingLLMConfig 4 | from utils import settings 5 | 6 | st.write("# Test your Client Chat") 7 | 8 | st.write(settings.get_inference_env_vars()) 9 | 10 | message_response = {"type": None, "message": None} 11 | 12 | llm = InferenceLLMConfig( 13 | model_name=settings.INFERENCE_DEPLOYMENT_NAME, 14 | base_url=settings.INFERENCE_BASE_URL, 15 | api_key=settings.INFERENCE_API_KEY, 16 | api_version=settings.INFERENCE_API_VERSION, 17 | ) 18 | 19 | embeddings_llm = EmbeddingLLMConfig( 20 | model_name=settings.EMBEDDINGS_DEPLOYMENT_NAME, 21 | base_url=settings.EMBEDDINGS_BASE_URL, 22 | api_key=settings.EMBEDDINGS_API_KEY, 23 | api_version=settings.EMBEDDINGS_API_VERSION, 24 | ) 25 | st.header("Ask your question", divider="rainbow") 26 | col1, col2 = st.columns([3, 1]) 27 | with col1: 28 | user_query = st.text_input(key="chat", label="Posez votre question") 29 | 30 | 31 | if user_query: 32 | try: 33 | # res = requests.get(f"{backend_url}/prefix_example/form/", params=params).json() 34 | 35 | res = llm.generate_from_messages( 36 | messages=[ 37 | { 38 | "role": "system", 39 | "content": "Tu est un chatbot qui répond aux questions.", 40 | }, 41 | {"role": "user", "content": user_query}, 42 | ], 43 | ) 44 | 45 | st.success(res) 46 | except Exception as e: 47 | res = f"Error: {e}" 48 | st.error(res) 49 | -------------------------------------------------------------------------------- /src/pages/1_embeddings.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | from ml.llm import EmbeddingLLMConfig 4 | from utils import settings 5 | 6 | st.write("# Test your Client Chat") 7 | 8 | st.write(settings.get_embeddings_env_vars()) 9 | 10 | message_response = {"type": None, "message": None} 11 | 12 | 13 | embeddings_llm = EmbeddingLLMConfig( 14 | model_name=settings.EMBEDDINGS_DEPLOYMENT_NAME, 15 | base_url=settings.EMBEDDINGS_BASE_URL, 16 | api_key=settings.EMBEDDINGS_API_KEY, 17 | api_version=settings.EMBEDDINGS_API_VERSION, 18 | ) 19 | 20 | 21 | st.title(" Test your Embeddings") 22 | 23 | col1, col2 = st.columns([3, 1]) 24 | with col1: 25 | text_to_emb = st.text_input(key="embedding", label="Write a text to embed") 26 | 27 | if text_to_emb: 28 | try: 29 | # res = requests.get(f"{backend_url}/prefix_example/form/", params=params).json() 30 | 31 | res = embeddings_llm.embed_text(text_to_emb) 32 | 33 | st.success(res) 34 | except Exception as e: 35 | res = f"Error: {e}" 36 | st.error(res) 37 | -------------------------------------------------------------------------------- /src/pages/2_azure_rag.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | from ml.ai import run_azure_ai_search_indexer 4 | from utils import settings, logger 5 | 6 | st.write("# Streamlit Azure RAG without fastapi") 7 | 8 | st.write(settings.get_azure_search_env_vars()) 9 | 10 | if not settings.ENABLE_AZURE_SEARCH: 11 | st.error("ENABLE_AZURE_SEARCH env var is not set to True") 12 | st.stop() 13 | 14 | from azure.storage.blob import BlobServiceClient 15 | 16 | message_response = {"type": None, "message": None} 17 | 18 | 19 | @st.fragment() 20 | def show_upload_documents(): 21 | global message_response 22 | 23 | st.write(f"### Documents disponibles dans le storage {settings.AZURE_CONTAINER_NAME}") 24 | blob_service_client = BlobServiceClient.from_connection_string( 25 | f"DefaultEndpointsProtocol=https;AccountName={settings.AZURE_STORAGE_ACCOUNT_NAME};AccountKey={settings.AZURE_STORAGE_ACCOUNT_KEY}" 26 | ) 27 | container_client = blob_service_client.get_container_client( 28 | container=settings.AZURE_CONTAINER_NAME 29 | ) 30 | 31 | blob_list = container_client.list_blobs() 32 | for i, blob in enumerate(blob_list): 33 | print(f"Name: {blob.name}") 34 | 35 | col1, col2 = st.columns([3, 1]) 36 | with col1: 37 | st.write(f"- {blob.name}") 38 | with col2: 39 | if st.button("Supprimer", key=f"button_{i}"): 40 | container_client.delete_blob(blob.name) 41 | run_azure_ai_search_indexer() 42 | message_response = {"type": "success", "message": "Document supprimé avec succès"} 43 | st.rerun(scope="fragment") 44 | 45 | uploaded_file = st.file_uploader("Transférer vos documents") 46 | if uploaded_file: 47 | blob_service_client = BlobServiceClient.from_connection_string( 48 | f"DefaultEndpointsProtocol=https;AccountName={settings.AZURE_STORAGE_ACCOUNT_NAME};AccountKey={settings.AZURE_STORAGE_ACCOUNT_KEY}" 49 | ) 50 | blob_client = blob_service_client.get_blob_client( 51 | container=settings.AZURE_CONTAINER_NAME, blob=uploaded_file.name 52 | ) 53 | 54 | try: 55 | res = blob_client.upload_blob(uploaded_file) 56 | except Exception as e: 57 | logger.error(f"Error uploading document: {e}") 58 | message_response = {"type": "error", "message": f"Error uploading document: {e}"} 59 | # st.rerun(scope="fragment") 60 | 61 | logger.trace(f"Document {uploaded_file.name} uploaded successfully") 62 | logger.debug(f"Document {uploaded_file.name} uploaded successfully") 63 | res = run_azure_ai_search_indexer() 64 | 65 | if res.status_code != 202: 66 | message_response = {"type": "error", "message": res.text} 67 | else: 68 | message_response = {"type": "success", "message": "Document téléchargé avec succès"} 69 | st.rerun(scope="fragment") 70 | 71 | 72 | show_upload_documents() 73 | 74 | if message_response["message"]: 75 | if message_response["type"] == "success": 76 | st.success(message_response["message"]) 77 | if message_response["type"] == "error": 78 | st.error(message_response["message"]) 79 | -------------------------------------------------------------------------------- /src/pages/3_fastapi_azure_rag.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import streamlit as st 3 | 4 | from utils import settings 5 | 6 | 7 | st.write("# RAG using fastapi") 8 | 9 | st.write(get_rag_env_variables()) 10 | 11 | 12 | backend_url = f"http://{settings.FASTAPI_HOST}:{settings.FASTAPI_PORT}/" 13 | 14 | 15 | @st.fragment 16 | def show_app_health(): 17 | try: 18 | res = requests.get(backend_url).json() 19 | st.success(res) 20 | except Exception as e: 21 | st.exception(f"FastAPI server encountered a problem. \n\n Error: {e}") 22 | exit() 23 | 24 | 25 | @st.fragment 26 | def create_form(questions: list, key: str, title: str = "Form"): 27 | st.header(title, divider="rainbow") 28 | 29 | if f"{key}_responses" in st.session_state: 30 | responses = st.session_state[f"{key}_responses"] 31 | successes = st.session_state[f"{key}_success"] 32 | else: 33 | responses = {} 34 | successes = {} 35 | st.session_state[f"{key}_responses"] = responses 36 | st.session_state[f"{key}_success"] = successes 37 | 38 | for i, question in enumerate(questions): 39 | col1, col2 = st.columns([3, 1]) 40 | with col1: 41 | st.write(f"- {question[0]}") 42 | with col2: 43 | if st.button("Envoyer", key=f"button_{i}_{question[0][:10]}"): 44 | try: 45 | params = {"question": question[1]} 46 | res = requests.get(f"{backend_url}/prefix_example/form/", params=params).json() 47 | successes[i] = True 48 | except Exception as e: 49 | res = f"Error: {e}" 50 | successes[i] = False 51 | responses[i] = f"{res}" 52 | 53 | st.session_state[f"{key}_responses"][i] = responses[i] 54 | st.session_state[f"{key}_success"][i] = successes[i] 55 | 56 | if i in responses: 57 | if successes[i]: 58 | st.success(f"Réponse automatique: {responses[i]}") 59 | else: 60 | st.error(f"Erreur {responses[i]}") 61 | 62 | 63 | @st.fragment() 64 | def show_ask_question(): 65 | st.header("Ask your question", divider="rainbow") 66 | col1, col2 = st.columns([3, 1]) 67 | with col1: 68 | q = st.text_input(key="chat", label="Posez votre question") 69 | 70 | params = None 71 | with col2: 72 | if st.button("Envoyer", key="button_chat"): 73 | params = {"question": q} 74 | 75 | if params: 76 | try: 77 | res = requests.get(f"{backend_url}/prefix_example/form/", params=params).json() 78 | st.success(res) 79 | except Exception as e: 80 | res = f"Error: {e}" 81 | st.error(res) 82 | 83 | 84 | # the first element is the question displayed in the UI, the second element is the question detailed to be sent to the LLM. 85 | questions = [ 86 | ( 87 | "Quelle est la date de naissance de la personne ?", 88 | "Quelle est la date de naissance de la personne ?", 89 | ), 90 | ] 91 | 92 | show_app_health() 93 | 94 | create_form(questions, key="general") 95 | 96 | show_ask_question() 97 | -------------------------------------------------------------------------------- /src/pages/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/src/pages/__init__.py -------------------------------------------------------------------------------- /src/settings_env.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Self 2 | 3 | from loguru import logger as loguru_logger 4 | from pydantic import SecretStr, model_validator 5 | from pydantic_settings import BaseSettings, SettingsConfigDict 6 | from rich.pretty import pretty_repr 7 | 8 | 9 | class BaseEnvironmentVariables(BaseSettings): 10 | model_config = SettingsConfigDict(env_file="../.env", extra="ignore") 11 | 12 | 13 | class InferenceEnvironmentVariables(BaseEnvironmentVariables): 14 | INFERENCE_BASE_URL: Optional[str] = "http://localhost:11434" 15 | INFERENCE_API_KEY: Optional[SecretStr] = "tt" 16 | INFERENCE_DEPLOYMENT_NAME: Optional[str] = "ollama_chat/qwen2.5:0.5b" 17 | INFERENCE_API_VERSION: str = "2025-02-01-preview" 18 | 19 | def get_inference_env_vars(self): 20 | return { 21 | "INFERENCE_BASE_URL": self.INFERENCE_BASE_URL, 22 | "INFERENCE_API_KEY": self.INFERENCE_API_KEY, 23 | "INFERENCE_DEPLOYMENT_NAME": self.INFERENCE_DEPLOYMENT_NAME, 24 | "INFERENCE_API_VERSION": self.INFERENCE_API_VERSION, 25 | } 26 | 27 | 28 | class EmbeddingsEnvironmentVariables(BaseEnvironmentVariables): 29 | EMBEDDINGS_BASE_URL: Optional[str] = None 30 | EMBEDDINGS_API_KEY: Optional[SecretStr] = "tt" 31 | EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None 32 | EMBEDDINGS_API_VERSION: str = "2025-02-01-preview" 33 | 34 | def get_embeddings_env_vars(self): 35 | return { 36 | "EMBEDDINGS_BASE_URL": self.EMBEDDINGS_BASE_URL, 37 | "EMBEDDINGS_API_KEY": self.EMBEDDINGS_API_KEY, 38 | "EMBEDDINGS_DEPLOYMENT_NAME": self.EMBEDDINGS_DEPLOYMENT_NAME, 39 | } 40 | 41 | 42 | class EvaluatorEnvironmentVariables(BaseEnvironmentVariables): 43 | EVALUATOR_BASE_URL: Optional[str] = "http://localhost:11434" 44 | EVALUATOR_API_KEY: Optional[SecretStr] = "tt" 45 | EVALUATOR_DEPLOYMENT_NAME: Optional[str] = "ollama_chat/qwen2.5:0.5b" 46 | EVALUATOR_API_VERSION: str = "2024-10-01-preview" 47 | 48 | ENABLE_EVALUATION: bool = False 49 | 50 | def get_evaluator_env_vars(self): 51 | return { 52 | "EVALUATOR_BASE_URL": self.EVALUATOR_BASE_URL, 53 | "EVALUATOR_API_KEY": self.EVALUATOR_API_KEY, 54 | "EVALUATOR_DEPLOYMENT_NAME": self.EVALUATOR_DEPLOYMENT_NAME, 55 | } 56 | 57 | @model_validator(mode="after") 58 | def check_eval_api_keys(self: Self) -> Self: 59 | """Validate API keys based on the selected provider after model initialization.""" 60 | if self.ENABLE_EVALUATION: 61 | eval_vars = self.get_evaluator_env_vars() 62 | if any(value is None for value in eval_vars.values()): 63 | # loguru_logger.opt(exception=True).error("Your error message") 64 | loguru_logger.error( 65 | "\nEVALUATION environment variables must be provided when ENABLE_EVALUATION is True." 66 | f"\n{pretty_repr(eval_vars)}" 67 | ) 68 | raise ValueError( 69 | "\nEVALUATION environment variables must be provided when ENABLE_EVALUATION is True." 70 | f"\n{pretty_repr(eval_vars)}" 71 | ) 72 | 73 | return self 74 | 75 | 76 | class AzureAISearchEnvironmentVariables(BaseEnvironmentVariables): 77 | """Represents environment variables for configuring Azure AI Search and Azure Storage.""" 78 | 79 | ################ Azure Search settings ################ 80 | ENABLE_AZURE_SEARCH: bool = False 81 | AZURE_SEARCH_SERVICE_ENDPOINT: Optional[str] = None 82 | AZURE_SEARCH_INDEX_NAME: Optional[str] = None 83 | AZURE_SEARCH_INDEXER_NAME: Optional[str] = None 84 | AZURE_SEARCH_API_KEY: Optional[str] = None 85 | AZURE_SEARCH_TOP_K: Optional[str] = "2" 86 | SEMENTIC_CONFIGURATION_NAME: Optional[str] = None 87 | # Azure Storage settings 88 | AZURE_STORAGE_ACCOUNT_NAME: Optional[str] = None 89 | AZURE_STORAGE_ACCOUNT_KEY: Optional[str] = None 90 | AZURE_CONTAINER_NAME: Optional[str] = None 91 | 92 | def get_azure_search_env_vars(self): 93 | items_dict = { 94 | "ENABLE_AZURE_SEARCH": self.ENABLE_AZURE_SEARCH, 95 | "SEMENTIC_CONFIGURATION_NAME": self.SEMENTIC_CONFIGURATION_NAME, 96 | "AZURE_STORAGE_ACCOUNT_NAME": self.AZURE_STORAGE_ACCOUNT_NAME, 97 | "AZURE_STORAGE_ACCOUNT_KEY": self.AZURE_STORAGE_ACCOUNT_KEY, 98 | "AZURE_CONTAINER_NAME": self.AZURE_CONTAINER_NAME, 99 | } 100 | 101 | items_dict.update( 102 | {key: value for key, value in vars(self).items() if key.startswith("AZURE_SEARCH")} 103 | ) 104 | return items_dict 105 | 106 | @model_validator(mode="after") 107 | def check_ai_search_keys(self: Self) -> Self: 108 | """Validate API keys based on the selected provider after model initialization.""" 109 | if self.ENABLE_AZURE_SEARCH: 110 | azure_search_vars = self.get_azure_search_env_vars() 111 | if any(value is None for value in azure_search_vars.values()): 112 | loguru_logger.error( 113 | "\nAZURE_SEARCH environment variables must be provided when ENABLE_AZURE_SEARCH is True." 114 | f"\n{pretty_repr(azure_search_vars)}" 115 | ) 116 | raise ValueError( 117 | "\nAZURE_SEARCH environment variables must be provided when ENABLE_AZURE_SEARCH is True." 118 | f"\n{pretty_repr(azure_search_vars)}" 119 | ) 120 | return self 121 | 122 | 123 | class Settings( 124 | InferenceEnvironmentVariables, 125 | EmbeddingsEnvironmentVariables, 126 | EvaluatorEnvironmentVariables, 127 | AzureAISearchEnvironmentVariables, 128 | ): 129 | """Settings class for the application. 130 | 131 | This class is automatically initialized with environment variables from the .env file. 132 | It inherits from the following classes and contains additional settings for streamlit and fastapi 133 | - ChatEnvironmentVariables 134 | - AzureAISearchEnvironmentVariables 135 | - EvaluationEnvironmentVariables 136 | 137 | """ 138 | 139 | FASTAPI_HOST: str = "localhost" 140 | FASTAPI_PORT: int = 8080 141 | STREAMLIT_PORT: int = 8501 142 | DEV_MODE: bool = True 143 | 144 | def get_active_env_vars(self): 145 | env_vars = { 146 | "DEV_MODE": self.DEV_MODE, 147 | "FASTAPI_PORT": self.FASTAPI_PORT, 148 | "STREAMLIT_PORT": self.STREAMLIT_PORT, 149 | } 150 | 151 | env_vars.update(self.get_inference_env_vars()) 152 | env_vars.update(self.get_embeddings_env_vars()) 153 | if self.ENABLE_AZURE_SEARCH: 154 | env_vars.update(self.get_azure_search_env_vars()) 155 | 156 | if self.ENABLE_EVALUATION: 157 | env_vars.update(self.get_evaluator_env_vars()) 158 | 159 | return env_vars 160 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import os 3 | import sys 4 | import timeit 5 | from pathlib import Path 6 | 7 | from azure.core.credentials import AzureKeyCredential 8 | from azure.search.documents import SearchClient 9 | from loguru import logger as loguru_logger 10 | from pydantic import ValidationError 11 | 12 | from settings_env import Settings 13 | 14 | # Check if we run the code from the src directory 15 | if Path("src").is_dir(): 16 | loguru_logger.warning("Changing working directory to src") 17 | loguru_logger.warning(f" Current working dir is {Path.cwd()}") 18 | os.chdir("src") 19 | elif Path("ml").is_dir(): 20 | # loguru_logger.warning(f" Current working dir is {Path.cwd()}") 21 | pass 22 | else: 23 | raise Exception( 24 | f"Project should always run from the src directory. But current working dir is {Path.cwd()}" 25 | ) 26 | 27 | 28 | def initialize(): 29 | """Initialize the settings, logger, and search client. 30 | 31 | Reads the environment variables from the .env file defined in the Settings class. 32 | 33 | Returns: 34 | settings 35 | loguru_logger 36 | search_client 37 | """ 38 | settings = Settings() 39 | loguru_logger.remove() 40 | 41 | if settings.DEV_MODE: 42 | loguru_logger.add(sys.stderr, level="TRACE") 43 | else: 44 | loguru_logger.add(sys.stderr, level="INFO") 45 | 46 | search_client = None 47 | if settings.ENABLE_AZURE_SEARCH: 48 | search_client = SearchClient( 49 | settings.AZURE_SEARCH_SERVICE_ENDPOINT, 50 | settings.AZURE_SEARCH_INDEX_NAME, 51 | AzureKeyCredential(settings.AZURE_SEARCH_API_KEY), 52 | ) 53 | 54 | return settings, loguru_logger, search_client 55 | 56 | 57 | def safe_eval(x): 58 | try: 59 | return ast.literal_eval(x) 60 | except: 61 | return [] 62 | 63 | 64 | def time_function(func): 65 | def wrapper(*args, **kwargs): 66 | start_time = timeit.default_timer() 67 | result = func(*args, **kwargs) 68 | 69 | end_time = timeit.default_timer() 70 | execution_time = round(end_time - start_time, 2) 71 | if "reason" in result: 72 | result["reason"] = f" Execution time: {execution_time}s | " + result["reason"] 73 | 74 | if "output" in result: 75 | result["output"] = f" Execution time: {execution_time}s | " + result["output"] 76 | logger.debug(f"Function {func.__name__} took {execution_time} seconds to execute.") 77 | 78 | return result 79 | 80 | return wrapper 81 | 82 | 83 | def validation_error_message(error: ValidationError) -> ValidationError: 84 | for err in error.errors(): 85 | del err["input"] 86 | del err["url"] 87 | 88 | return error 89 | 90 | 91 | settings, logger, search_client = initialize() 92 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AmineDjeghri/generative-ai-project-template/627bd5460e6720d3b10598918e4b9896576e5e83/tests/__init__.py -------------------------------------------------------------------------------- /tests/readme.txt: -------------------------------------------------------------------------------- 1 | tests needs to be run from the root of the repo. 2 | You can use make pytest 3 | -------------------------------------------------------------------------------- /tests/test_llm_endpoint.py: -------------------------------------------------------------------------------- 1 | import ollama 2 | import requests 3 | from rich.pretty import pretty_repr 4 | 5 | from ml.llm import InferenceLLMConfig 6 | from utils import settings, logger 7 | 8 | OLLAMA_MODEL_NAME = "qwen2.5:0.5b" 9 | OLLAMA_BASE_URL = "http://localhost:11434" 10 | 11 | 12 | def test_ping_ollama(): 13 | response = requests.get(f"{OLLAMA_BASE_URL}") 14 | assert response.status_code == 200 15 | 16 | 17 | def test_download_model(): 18 | ollama.pull(OLLAMA_MODEL_NAME) 19 | models = [model for model in ollama.list()][0][1] 20 | models_names = [model.model for model in models] 21 | logger.debug(f" list models: {models_names}") 22 | assert OLLAMA_MODEL_NAME in models_names 23 | # ollama.delete(OLLAMA_MODEL_NAME) 24 | 25 | 26 | def test_ollama_run(): 27 | ollama.show(OLLAMA_MODEL_NAME) 28 | 29 | 30 | def test_ollama_chat(): 31 | res = ollama.chat(model=OLLAMA_MODEL_NAME, messages=[{"role": "user", "content": "Hi"}]) 32 | assert type(res.message.content) == str 33 | 34 | 35 | def test_inference_llm(): 36 | """Test the LLM client used to generate answers.""" 37 | llm = InferenceLLMConfig( 38 | model_name=settings.INFERENCE_DEPLOYMENT_NAME, 39 | api_key=settings.INFERENCE_API_KEY, 40 | base_url=settings.INFERENCE_BASE_URL, 41 | api_version=settings.INFERENCE_API_VERSION, 42 | ) 43 | logger.info(f" Inference LLM Config is: {llm}") 44 | res = llm.generate("Hi") 45 | logger.info( 46 | f"\nActive environment variables are: \n{pretty_repr(settings.get_active_env_vars())}\n" 47 | f"\nmodel response: {res}" 48 | ) 49 | assert type(res) == str 50 | 51 | 52 | # @pytest.mark.skipif(not settings.ENABLE_EVALUATION, reason="requires env ENABLE_EVALUATION=True") 53 | # def test_evaluator_llm(): 54 | # """Test the LLM as a judge client used in the evaluation.""" 55 | # check_evaluator_llm() 56 | -------------------------------------------------------------------------------- /tests/test_rag.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from ml.ai import get_related_document_ai_search, get_rag_response, run_azure_ai_search_indexer 6 | from utils import logger, settings 7 | 8 | logger.info(f" working directory is {os.getcwd()}") 9 | 10 | 11 | @pytest.mark.skipif( 12 | not settings.ENABLE_AZURE_SEARCH, reason="requires env ENABLE_AZURE_SEARCH=True" 13 | ) 14 | def test_get_related_document_ai_search(): 15 | user_input = "What is the capital of France?" 16 | question_context = get_related_document_ai_search(user_input) 17 | 18 | assert type(question_context) == str 19 | 20 | 21 | @pytest.mark.skipif( 22 | not settings.ENABLE_AZURE_SEARCH, reason="requires env ENABLE_AZURE_SEARCH=True" 23 | ) 24 | def test_get_rag_response(): 25 | res = get_rag_response("What is the capital of France?") 26 | assert type(res) == str 27 | 28 | 29 | @pytest.mark.skipif( 30 | not settings.ENABLE_AZURE_SEARCH, reason="requires env ENABLE_AZURE_SEARCH=True" 31 | ) 32 | def test_run_azure_ai_search_indexer(): 33 | assert run_azure_ai_search_indexer().status_code == 202 34 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | from settings_env import Settings 2 | 3 | 4 | def test_settings(): 5 | Settings() 6 | --------------------------------------------------------------------------------