├── tests
    ├── utils
    │   ├── __init__.py
    │   ├── eBook-How-to-Build-a-Career-in-AI.pdf
    │   ├── eval_questions.txt
    │   ├── test_utilsrag_lc.py
    │   ├── test_utilsfile.py
    │   └── test_utilsrag_li.py
    ├── rag_bedrock
    │   ├── eval_document.pdf
    │   ├── eval_questions.txt
    │   ├── test_rag_llamaindex.py
    │   └── test_rag_langchain.py
    ├── view_tru_dashboard.py
    └── conftest.py
├── rag_assistant
    ├── shared
    │   ├── __init__.py
    │   ├── rag_prompts.py
    │   └── llm_facade.py
    ├── utils
    │   ├── __init__.py
    │   ├── config_loader.py
    │   ├── constants.py
    │   ├── auth.py
    │   ├── utilsfile.py
    │   ├── utilsvision.py
    │   ├── utilsllm.py
    │   └── utilsrag_lc.py
    ├── Hello.py
    ├── streamlit_app.py
    ├── pages
    │   ├── 2_Load_Document.py
    │   └── 3_RAG_Admin.py
    └── legacy_ux
    │   └── 1_RAG_agent_with_LC.py
├── terraform
    └── aws
    │   ├── providers.tf
    │   ├── s3.tf
    │   ├── kms.tf
    │   ├── data.tf
    │   ├── route53.tf
    │   ├── security_group.tf
    │   ├── efs.tf
    │   ├── opensearch.tf
    │   ├── outputs.tf
    │   ├── lb.tf
    │   ├── variables.tf
    │   ├── iam.tf
    │   └── ecs.tf
├── data
    └── sources
    │   ├── pdf
    │       ├── arxiv
    │       │   └── 2210.01241.pdf
    │       ├── GenAI
    │       │   └── aws-caf-for-ai.pdf
    │       ├── aws
    │       │   └── serverless
    │       │   │   └── serverless-core.pdf
    │       ├── Security
    │       │   ├── Cloud Security Guide for SMEs.pdf
    │       │   └── LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf
    │       ├── Application
    │       │   └── beyond-the-twelve-factor-app.pdf
    │       ├── Cloud
    │       │   ├── aws-cloud-adoption-framework_fr-FR.pdf
    │       │   └── cloud-enablement-engine-practical-guide.pdf
    │       ├── Architecture
    │       │   └── AWS_Well-Architected_Framework.pdf
    │       └── Risk
    │       │   └── Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf
    │   └── md
    │       └── 12factor
    │           ├── en
    │               ├── who.md
    │               ├── intro.md
    │               ├── toc.md
    │               ├── background.md
    │               ├── codebase.md
    │               ├── port-binding.md
    │               ├── admin-processes.md
    │               ├── logs.md
    │               ├── processes.md
    │               ├── build-release-run.md
    │               ├── dependencies.md
    │               ├── concurrency.md
    │               ├── backing-services.md
    │               ├── disposability.md
    │               ├── config.md
    │               └── dev-prod-parity.md
    │           └── fr
    │               ├── who.md
    │               ├── intro.md
    │               ├── toc.md
    │               ├── background.md
    │               ├── port-binding.md
    │               ├── codebase.md
    │               ├── admin-processes.md
    │               ├── logs.md
    │               ├── processes.md
    │               ├── dependencies.md
    │               ├── build-release-run.md
    │               ├── backing-services.md
    │               ├── disposability.md
    │               ├── concurrency.md
    │               ├── config.md
    │               └── dev-prod-parity.md
├── docker-compose.yml
├── .github
    └── workflows
    │   ├── python_test.yml
    │   └── aws.yml
├── .gitignore
├── Dockerfile
├── pyproject.toml
├── conf
    └── config.ini
└── README.md


/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rag_assistant/shared/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/terraform/aws/providers.tf:
--------------------------------------------------------------------------------
1 | provider "aws" {
2 |   region = "eu-west-1"
3 | }
4 | 


--------------------------------------------------------------------------------
/terraform/aws/s3.tf:
--------------------------------------------------------------------------------
1 | resource "aws_s3_bucket" "ai_assistant_bucket" {
2 |   bucket = "finaxys-ai-assistant-bucket"
3 | }
4 | 


--------------------------------------------------------------------------------
/data/sources/pdf/arxiv/2210.01241.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/arxiv/2210.01241.pdf


--------------------------------------------------------------------------------
/tests/rag_bedrock/eval_document.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/tests/rag_bedrock/eval_document.pdf


--------------------------------------------------------------------------------
/tests/view_tru_dashboard.py:
--------------------------------------------------------------------------------
1 | from trulens_eval import Tru
2 | 
3 | tru = Tru(database_redact_keys=True)
4 | # tru.reset_database()
5 | tru.run_dashboard()


--------------------------------------------------------------------------------
/data/sources/pdf/GenAI/aws-caf-for-ai.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/GenAI/aws-caf-for-ai.pdf


--------------------------------------------------------------------------------
/tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/aws/serverless/serverless-core.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/aws/serverless/serverless-core.pdf


--------------------------------------------------------------------------------
/terraform/aws/kms.tf:
--------------------------------------------------------------------------------
1 | resource "aws_kms_key" "key" {
2 |   description             = "Ai Assistant ECS cluster CloudWatch log KMS key"
3 |   deletion_window_in_days = 7
4 | }
5 | 


--------------------------------------------------------------------------------
/data/sources/pdf/Security/Cloud Security Guide for SMEs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Security/Cloud Security Guide for SMEs.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/Application/beyond-the-twelve-factor-app.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Application/beyond-the-twelve-factor-app.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/Cloud/aws-cloud-adoption-framework_fr-FR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Cloud/aws-cloud-adoption-framework_fr-FR.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/Architecture/AWS_Well-Architected_Framework.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Architecture/AWS_Well-Architected_Framework.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/Cloud/cloud-enablement-engine-practical-guide.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Cloud/cloud-enablement-engine-practical-guide.pdf


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/who.md:
--------------------------------------------------------------------------------
1 | Who should read this document?
2 | ==============================
3 | 
4 | Any developer building applications which run as a service.  Ops engineers who deploy or manage such applications.
5 | 


--------------------------------------------------------------------------------
/data/sources/pdf/Security/LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Security/LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf


--------------------------------------------------------------------------------
/data/sources/pdf/Risk/Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Risk/Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/who.md:
--------------------------------------------------------------------------------
1 | Qui devrait lire ce document ?
2 | ==============================
3 | 
4 | Tout développeur qui construit des applications qui fonctionnent en tant que service, ainsi que les personnes qui déploient et gèrent de telles applications.
5 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   langchain-streamlit-agent:
 4 |     image: applied-ai-rag-assistant:latest
 5 |     build: ./app
 6 |     command: streamlit run rag_assistant/Hello.py --server.port 8051
 7 |     volumes:
 8 |       - ./rag_assistant/:/app/rag_assistant
 9 |     ports:
10 |       - 8051:8051


--------------------------------------------------------------------------------
/rag_assistant/utils/config_loader.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | def load_config():
 6 |     base_dir = Path(__file__).resolve().parent.parent  # Chemin du dossier projet
 7 |     config_path = base_dir.parent / 'conf' / 'config.ini'
 8 | 
 9 |     config = configparser.ConfigParser()
10 |     config.read(config_path)
11 |     return config
12 | 


--------------------------------------------------------------------------------
/terraform/aws/data.tf:
--------------------------------------------------------------------------------
 1 | data "aws_vpc" "ai_assistant_vpc" {
 2 |   id = var.vpc_id
 3 | }
 4 | 
 5 | data "aws_subnet" "ai_assistant_subnet_1" {
 6 |   id = var.subnet_id_1
 7 | }
 8 | 
 9 | data "aws_subnet" "ai_assistant_subnet_2" {
10 |   id = var.subnet_id_2
11 | }
12 | 
13 | data "aws_subnet" "ai_assistant_subnet_3" {
14 |   id = var.subnet_id_3
15 | }
16 | 
17 | data "aws_secretsmanager_secret" "secret" {
18 |   name = var.secret_name
19 | }
20 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # tests/conftest.py
 2 | import sys
 3 | import os
 4 | 
 5 | import pytest
 6 | 
 7 | from trulens_eval import (
 8 |     Tru
 9 | )
10 | 
11 | 
12 | # Add the root directory of the project to the Python path
13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
14 | 
15 | 
16 | @pytest.fixture(scope="session")
17 | def trulens_prepare():
18 |     tru = Tru(database_redact_keys=True)
19 |     tru.reset_database()
20 |     return tru


--------------------------------------------------------------------------------
/tests/utils/eval_questions.txt:
--------------------------------------------------------------------------------
 1 | What are the keys to building a career in AI?
 2 | How can teamwork contribute to success in AI?
 3 | What is the importance of networking in AI?
 4 | What are some good habits to develop for a successful career?
 5 | How can altruism be beneficial in building a career?
 6 | What is imposter syndrome and how does it relate to AI?
 7 | Who are some accomplished individuals who have experienced imposter syndrome?
 8 | What is the first step to becoming good at AI?
 9 | What are some common challenges in AI?
10 | Is it normal to find parts of AI challenging?


--------------------------------------------------------------------------------
/terraform/aws/route53.tf:
--------------------------------------------------------------------------------
 1 | data "aws_acm_certificate" "ai_assistant_certificate" {
 2 |   domain = var.dns_url_app_subnet
 3 | }
 4 | 
 5 | data "aws_route53_zone" "ai_assistant_zone" {
 6 |   name = var.dns_url
 7 | }
 8 | 
 9 | resource "aws_route53_record" "record" {
10 |   zone_id = data.aws_route53_zone.ai_assistant_zone.zone_id
11 |   name    = var.dns_url_app_subnet
12 |   type    = "A"
13 |   alias {
14 |     name                   = aws_lb.application_lb.dns_name
15 |     zone_id                = aws_lb.application_lb.zone_id
16 |     evaluate_target_health = true
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/rag_bedrock/eval_questions.txt:
--------------------------------------------------------------------------------
 1 | How can participating in competitions like Kaggle contribute to your AI career growth?
 2 | What are the keys to building a career in AI?
 3 | How can teamwork contribute to success in AI?
 4 | What is the importance of networking in AI?
 5 | What are some good habits to develop for a successful career?
 6 | What is imposter syndrome and how does it relate to AI?
 7 | Who are some accomplished individuals who have experienced imposter syndrome?
 8 | What is the first step to becoming good at AI?
 9 | What are some common challenges in AI?
10 | Is it normal to find parts of AI challenging?


--------------------------------------------------------------------------------
/rag_assistant/utils/constants.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | class DocumentType(Enum):
 4 |     STANDARD = "Norme"
 5 |     GUIDE = "Guide"
 6 |     TUTORIAL = "Tutoriel"
 7 |     FAQ = "FAQ"
 8 | 
 9 | 
10 | class ChunkType(Enum):
11 |     TEXT = "Text"
12 |     IMAGE = "Image"
13 | 
14 | 
15 | class Metadata(Enum):
16 |     DOCUMENT_TYPE = "document_type"
17 |     CHUNK_TYPE = "chunk_type"
18 |     TOPIC = "topic"
19 |     PAGE = "page"
20 |     FILENAME = "filename"
21 | 
22 | 
23 | class SupportedFileType(Enum):
24 |     PDF = "pdf"
25 |     MARKDOWN = "md"
26 |     TEXT = "txt"
27 | 
28 | class StorageType(Enum):
29 |     S3 = "S3"
30 |     LOCAL = "LOCAL"
31 |     NONE = "NONE"
32 | 
33 | 
34 | class CollectionType(Enum):
35 |     DOCUMENTS = "documents"
36 |     IMAGES = "images"
37 | 


--------------------------------------------------------------------------------
/.github/workflows/python_test.yml:
--------------------------------------------------------------------------------
 1 | name: Test python application
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, stable ]
 6 |   pull_request:
 7 |     branches: [ main, stable ]
 8 | 
 9 | env:
10 |   OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
11 |   MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
12 |   AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
13 |   AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
14 | jobs:
15 |   test:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python 3.11
20 |       uses: actions/setup-python@v2
21 |       with:
22 |         python-version: 3.11
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install poetry==1.4.2
27 |         poetry install
28 |     - name: Test with pytest
29 |       run: |
30 |         poetry run pytest
31 | 
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vs/
 2 | .vscode/
 3 | .idea/
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | .streamlit/
10 | 
11 | # Installer logs
12 | pip-log.txt
13 | pip-delete-this-directory.txt
14 | 
15 | # Jupyter Notebook
16 | .ipynb_checkpoints
17 | notebooks/
18 | 
19 | # Environments
20 | .env
21 | .envrc
22 | .venv
23 | .venvs
24 | env/
25 | venv/
26 | ENV/
27 | env.bak/
28 | venv.bak/
29 | 
30 | 
31 | # C extensions
32 | *.so
33 | *.pkl
34 | *.bin
35 | 
36 | # macOS display setting files
37 | .DS_Store
38 | 
39 | # Terraform
40 | terraform/*/.terraform/**
41 | terraform/*/.terraform*
42 | terraform/*/terraform.*
43 | 
44 | 
45 | secrets.toml
46 | /data/chroma
47 | 
48 | /data/chroma2
49 | /data/UBER
50 | default.sqlite
51 | /merging_index
52 | /sentence_index
53 | /storage/AI
54 | /storage
55 | /data/cache
56 | /data/faiss
57 | /logs
58 | *.log
59 | /tests/rechdocia
60 | /data/llama_index/
61 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/intro.md:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ============
 3 | 
 4 | In the modern era, software is commonly delivered as a service: called *web apps*, or *software-as-a-service*.  The twelve-factor app is a methodology for building software-as-a-service apps that:
 5 | 
 6 | * Use **declarative** formats for setup automation, to minimize time and cost for new developers joining the project;
 7 | * Have a **clean contract** with the underlying operating system, offering **maximum portability** between execution environments;
 8 | * Are suitable for **deployment** on modern **cloud platforms**, obviating the need for servers and systems administration;
 9 | * **Minimize divergence** between development and production, enabling **continuous deployment** for maximum agility;
10 | * And can **scale up** without significant changes to tooling, architecture, or development practices.
11 | 
12 | The twelve-factor methodology can be applied to apps written in any programming language, and which use any combination of backing services (database, queue, memory cache, etc).
13 | 


--------------------------------------------------------------------------------
/terraform/aws/security_group.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_security_group" "ai_assistant_security_group" {
 2 |   name   = "ai_assistant-security-group-https"
 3 |   vpc_id = data.aws_vpc.ai_assistant_vpc.id
 4 | 
 5 |   ingress {
 6 |     from_port   = 80
 7 |     to_port     = 80
 8 |     protocol    = "TCP"
 9 |     cidr_blocks = ["0.0.0.0/0"]
10 |   }
11 |   ingress {
12 |     from_port   = 443
13 |     to_port     = 443
14 |     protocol    = "TCP"
15 |     cidr_blocks = ["0.0.0.0/0"]
16 |   }
17 |   ingress {
18 |     from_port   = 2049
19 |     to_port     = 2049
20 |     protocol    = "TCP"
21 |     cidr_blocks = ["0.0.0.0/0"]
22 |   }
23 |   egress {
24 |     from_port        = 0
25 |     to_port          = 0
26 |     protocol         = "-1"
27 |     cidr_blocks      = ["0.0.0.0/0"]
28 |     ipv6_cidr_blocks = ["::/0"]
29 |   }
30 | }
31 | 
32 | resource "aws_security_group" "ai_assistant_opensearch" {
33 |   name        = "ai-assistant-opensearch"
34 |   vpc_id      = data.aws_vpc.ai_assistant_vpc.id
35 | 
36 |   ingress {
37 |     from_port = 443
38 |     to_port   = 443
39 |     protocol  = "tcp"
40 |     cidr_blocks = ["0.0.0.0/0"]
41 |   }
42 | }


--------------------------------------------------------------------------------
/rag_assistant/Hello.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import sys
 3 | 
 4 | from utils.auth import check_password
 5 | from utils.config_loader import load_config
 6 | from utils.utilsllm import get_model_provider, get_model_name, get_embeddings_model_name
 7 | 
 8 | config = load_config()
 9 | 
10 | app_name = config['DEFAULT']['APP_NAME']
11 | vectordb = config['VECTORDB']['vectordb']
12 | 
13 | st.set_page_config(
14 |     page_title="Hello",
15 |     page_icon="👋",
16 | )
17 | 
18 | model_provider = get_model_provider()
19 | model_name = get_model_name(provider=model_provider)
20 | embeddings_model = get_embeddings_model_name(provider=model_provider)
21 | 
22 | def main():
23 |     st.title(f"""Bienvenue sur {app_name} ! 👋""")
24 | 
25 |     st.markdown(
26 |         f"""
27 |         **{app_name}** utilise '**{model_provider}**' avec comme _modèle de langage_ '**{model_name}**'.
28 |         
29 |         La _base de connaissance_ est sur '**{vectordb}**' avec comme _modèle d'embedding_ : '**{embeddings_model}**'.
30 |     """
31 |     )
32 |     st.write(sys.version)
33 | 
34 | 
35 | if __name__ == "__main__":
36 | 
37 |     if not check_password():
38 |         # Do not continue if check_password is not True.
39 |         st.stop()
40 | 
41 |     main()
42 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/toc.md:
--------------------------------------------------------------------------------
 1 | The Twelve Factors
 2 | ==================
 3 | 
 4 | ## [I. Codebase](./codebase)
 5 | ### One codebase tracked in revision control, many deploys
 6 | 
 7 | ## [II. Dependencies](./dependencies)
 8 | ### Explicitly declare and isolate dependencies
 9 | 
10 | ## [III. Config](./config)
11 | ### Store config in the environment
12 | 
13 | ## [IV. Backing services](./backing-services)
14 | ### Treat backing services as attached resources
15 | 
16 | ## [V. Build, release, run](./build-release-run)
17 | ### Strictly separate build and run stages
18 | 
19 | ## [VI. Processes](./processes)
20 | ### Execute the app as one or more stateless processes
21 | 
22 | ## [VII. Port binding](./port-binding)
23 | ### Export services via port binding
24 | 
25 | ## [VIII. Concurrency](./concurrency)
26 | ### Scale out via the process model
27 | 
28 | ## [IX. Disposability](./disposability)
29 | ### Maximize robustness with fast startup and graceful shutdown
30 | 
31 | ## [X. Dev/prod parity](./dev-prod-parity)
32 | ### Keep development, staging, and production as similar as possible
33 | 
34 | ## [XI. Logs](./logs)
35 | ### Treat logs as event streams
36 | 
37 | ## [XII. Admin processes](./admin-processes)
38 | ### Run admin/management tasks as one-off processes
39 | 


--------------------------------------------------------------------------------
/terraform/aws/efs.tf:
--------------------------------------------------------------------------------
 1 | 
 2 | resource "aws_kms_key" "volume_key" {
 3 |   description             = "Ai Assistant EFS Volume key"
 4 |   deletion_window_in_days = 7
 5 | }
 6 | 
 7 | resource "aws_efs_file_system" "ai_assistant_efs_file_system" {
 8 |   encrypted  = true
 9 |   kms_key_id = aws_kms_key.volume_key.arn
10 | 
11 |   tags = {
12 |     Name = "ai-assistant"
13 |   }
14 | }
15 | 
16 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_1" {
17 |   file_system_id  = aws_efs_file_system.ai_assistant_efs_file_system.id
18 |   subnet_id       = data.aws_subnet.ai_assistant_subnet_1.id
19 |   security_groups = [aws_security_group.ai_assistant_security_group.id]
20 | }
21 | 
22 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_2" {
23 |   file_system_id  = aws_efs_file_system.ai_assistant_efs_file_system.id
24 |   subnet_id       = data.aws_subnet.ai_assistant_subnet_2.id
25 |   security_groups = [aws_security_group.ai_assistant_security_group.id]
26 | }
27 | 
28 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_3" {
29 |   file_system_id  = aws_efs_file_system.ai_assistant_efs_file_system.id
30 |   subnet_id       = data.aws_subnet.ai_assistant_subnet_3.id
31 |   security_groups = [aws_security_group.ai_assistant_security_group.id]
32 | }
33 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/intro.md:
--------------------------------------------------------------------------------
 1 | Introduction
 2 | ============
 3 | 
 4 | À l'époque actuelle, les logiciels sont régulièrement délivrés en tant que services : on les appelle des *applications web* (web apps), ou *logiciels en tant que service* (*software-as-a-service*). L'application 12 facteurs est une méthodologie pour concevoir des logiciels en tant que service qui :
 5 | 
 6 | * Utilisent des formats **déclaratifs** pour mettre en oeuvre l'automatisation, pour minimiser le temps et les coûts pour que de nouveaux développeurs rejoignent le projet;
 7 | * Ont un **contrat propre** avec le système d'exploitation sous-jacent, offrant une **portabilité maximum** entre les environnements d'exécution;
 8 | * Sont adaptés à des **déploiements** sur des **plateformes cloud** modernes, rendant inutile le besoin de serveurs et de l'administration de systèmes;
 9 | * **Minimisent la divergence** entre le développement et la production, ce qui permet le **déploiement continu** pour une agilité maximum;
10 | * et peuvent **grossir verticalement** sans changement significatif dans les outils, l'architecture ou les pratiques de développement;
11 | 
12 | La méthodologie 12 facteurs peut être appliquée à des applications écrites dans tout langage de programmation, et qui utilisent tout type de services externes (base de données, file, cache mémoire, etc.)
13 | 


--------------------------------------------------------------------------------
/terraform/aws/opensearch.tf:
--------------------------------------------------------------------------------
 1 | data "aws_iam_policy_document" "opensearch_domain_policy" {
 2 |   statement {
 3 |     effect = "Allow"
 4 | 
 5 |     principals {
 6 |       type        = "AWS"
 7 |       identifiers = var.opensearch_allowed_users_and_policy_arn #list of arn
 8 |     }
 9 | 
10 |     actions   = ["es:*"]
11 |     resources = ["arn:aws:es:eu-west-1:441525731509:domain/ai-assistant/*"]
12 |   }
13 | }
14 | 
15 | resource "aws_opensearch_domain" "ai_assistant_opensearch_domain" {
16 |   domain_name    = var.opensearch_domain_name
17 |   engine_version = "OpenSearch_2.13"
18 | 
19 |   cluster_config {
20 |     instance_type          = "r5.large.search"
21 |     zone_awareness_enabled = false
22 |     instance_count = 1
23 |     multi_az_with_standby_enabled = false
24 |   }
25 |   
26 |   vpc_options {
27 |     subnet_ids         = [data.aws_subnet.ai_assistant_subnet_1.id]
28 |     security_group_ids = [aws_security_group.ai_assistant_security_group.id]
29 |   }
30 | 
31 |   domain_endpoint_options {
32 |     enforce_https       = true
33 |     tls_security_policy = "Policy-Min-TLS-1-2-2019-07"
34 |   }
35 | 
36 |   node_to_node_encryption {
37 |     enabled = true
38 |   }
39 | 
40 |   ebs_options {
41 |     ebs_enabled = true
42 |     volume_size = 10
43 |   }
44 |   access_policies = data.aws_iam_policy_document.opensearch_domain_policy.json
45 | }
46 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/auth.py:
--------------------------------------------------------------------------------
 1 | import hmac
 2 | import streamlit as st
 3 | 
 4 | def check_password():
 5 |     """Returns `True` if the user had the correct password."""
 6 | 
 7 |     if st.session_state.get("password_correct", False):
 8 |         return True
 9 | 
10 |     try:
11 | 
12 |         if "password" not in st.secrets:
13 |             # no password required
14 |             st.session_state["password_correct"] = True
15 |             return True
16 | 
17 |     except FileNotFoundError:
18 |         # no secrets.toml so no password required
19 |         # no password required
20 |         st.session_state["password_correct"] = True
21 |         return True
22 | 
23 |     def password_entered():
24 |         """Checks whether a password entered by the user is correct."""
25 |         if hmac.compare_digest(st.session_state["password"], st.secrets["password"]):
26 |             st.session_state["password_correct"] = True
27 |             del st.session_state["password"]  # Don't store the password.
28 |         else:
29 |             st.session_state["password_correct"] = False
30 | 
31 |         # Return True if the password is validated.
32 | 
33 |     # Show input for password.
34 |     st.text_input(
35 |         "Password", type="password", on_change=password_entered, key="password"
36 |     )
37 |     if "password_correct" in st.session_state:
38 |         st.error("😕 Password incorrect")
39 |     return False
40 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # The builder image, used to build the virtual environment
 2 | FROM python:3.11-slim as builder
 3 | 
 4 | RUN apt-get update
 5 | RUN apt-get install build-essential -y
 6 | 
 7 | RUN pip install poetry==1.4.2
 8 | 
 9 | ENV POETRY_NO_INTERACTION=1 \
10 |     POETRY_VIRTUALENVS_IN_PROJECT=1 \
11 |     POETRY_VIRTUALENVS_CREATE=1 \
12 |     POETRY_CACHE_DIR=/tmp/poetry_cache
13 | 
14 | # A directory to have app data 
15 | WORKDIR /app
16 | 
17 | COPY pyproject.toml poetry.lock ./
18 | 
19 | RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR
20 | 
21 | # The runtime image, used to just run the code provided its virtual environment
22 | FROM python:3.11-slim as runtime
23 | 
24 | WORKDIR /app
25 | 
26 | COPY tests tests
27 | COPY conf conf
28 | 
29 | RUN mkdir -p .streamlit
30 | RUN touch .streamlit/secrets.toml
31 | 
32 | ENV VIRTUAL_ENV=/app/.venv \
33 |     PATH="/app/.venv/bin:$PATH"
34 | 
35 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
36 | 
37 | COPY rag_assistant rag_assistant
38 | 
39 | RUN apt-get update
40 | RUN apt-get install wget -y
41 | RUN mkdir /opt/tiktoken_cache
42 | ARG TIKTOKEN_URL="https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
43 | RUN wget -O /opt/tiktoken_cache/$(echo -n $TIKTOKEN_URL | sha1sum | head -c 40) $TIKTOKEN_URL
44 | ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache
45 | 
46 | CMD ["streamlit", "run", "rag_assistant/Hello.py", "--server.port", "80"]
47 | 


--------------------------------------------------------------------------------
/terraform/aws/outputs.tf:
--------------------------------------------------------------------------------
 1 | output "vpc_id" {
 2 |   description = "VPC ID"
 3 |   value       = var.vpc_id
 4 | }
 5 | 
 6 | output "subnet_1_id" {
 7 |   description = "Subnet 1 ID"
 8 |   value       = var.subnet_id_1
 9 | }
10 | 
11 | output "subnet_2_id" {
12 |   description = "Subnet 2 ID"
13 |   value       = var.subnet_id_2
14 | }
15 | 
16 | output "subnet_3_id" {
17 |   description = "Subnet 3 ID"
18 |   value       = var.subnet_id_3
19 | }
20 | 
21 | output "lb_arn" {
22 |   description = "Load Balancer arn"
23 |   value       = aws_lb.application_lb.arn
24 | }
25 | 
26 | output "lb_dns_name" {
27 |   description = "Load Balancer DNS name"
28 |   value       = aws_lb.application_lb.dns_name
29 | }
30 | 
31 | output "aws_ecs_cluster_arn" {
32 |   description = "ECS Cluster arn"
33 |   value       = aws_ecs_cluster.ai_assistant_cluster.id
34 | }
35 | 
36 | output "app_url" {
37 |   description = "URL to access the deployed application"
38 |   value       = var.dns_url_app_subnet
39 | }
40 | 
41 | output "ecr_image_url" {
42 |   description = "URL to the image in ECR repository"
43 |   value       = var.ecr_image_url
44 | }
45 | 
46 | output "cloudwatch_name" {
47 |   description = "CloudWatch log group name"
48 |   value       = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.name
49 | }
50 | 
51 | output "cloudwatch_arn" {
52 |   description = "CloudWatch log group arn"
53 |   value       = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.arn
54 | }
55 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/toc.md:
--------------------------------------------------------------------------------
 1 | Les 12 facteurs
 2 | ==================
 3 | 
 4 | ## [I. Base de code](./codebase)
 5 | ### Une base de code suivie avec un système de contrôle de version, plusieurs déploiements
 6 | 
 7 | ## [II. Dépendances](./dependencies)
 8 | ### Déclarez explicitement et isolez les dépendances
 9 | 
10 | ## [III. Configuration](./config)
11 | ### Stockez la configuration dans l'environnement
12 | 
13 | ## [IV. Services externes](./backing-services)
14 | ### Traitez les services externes comme des ressources attachées
15 | 
16 | ## [V. Assemblez, publiez, exécutez](./build-release-run)
17 | ### Séparez strictement les étapes d'assemblage et d'exécution
18 | 
19 | ## [VI. Processus](./processes)
20 | ### Exécutez l'application comme un ou plusieurs processus sans état
21 | 
22 | ## [VII. Associations de ports](./port-binding)
23 | ### Exportez les services via des associations de ports
24 | 
25 | ## [VIII. Concurrence](./concurrency)
26 | ### Grossissez à l'aide du modèle de processus
27 | 
28 | ## [IX. Jetable](./disposability)
29 | ### Maximisez la robustesse avec des démarrages rapides et des arrêts gracieux
30 | 
31 | ## [X. Parité dev/prod](./dev-prod-parity)
32 | ### Gardez le développement, la validation et la production aussi proches que possible
33 | 
34 | ## [XI. Logs](./logs)
35 | ### Traitez les logs comme des flux d'évènements
36 | 
37 | ## [XII. Processus d'administration](./admin-processes)
38 | ### Lancez les processus d'administration et de maintenance comme des one-off-processes
39 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/background.md:
--------------------------------------------------------------------------------
 1 | Background
 2 | ==========
 3 | 
 4 | The contributors to this document have been directly involved in the development and deployment of hundreds of apps, and indirectly witnessed the development, operation, and scaling of hundreds of thousands of apps via our work on the <a href="http://www.heroku.com/" target="_blank">Heroku</a> platform.
 5 | 
 6 | This document synthesizes all of our experience and observations on a wide variety of software-as-a-service apps in the wild.  It is a triangulation on ideal practices for app development, paying particular attention to the dynamics of the organic growth of an app over time, the dynamics of collaboration between developers working on the app's codebase, and <a href="http://blog.heroku.com/archives/2011/6/28/the_new_heroku_4_erosion_resistance_explicit_contracts/" target="_blank">avoiding the cost of software erosion</a>.
 7 | 
 8 | Our motivation is to raise awareness of some systemic problems we've seen in modern application development, to provide a shared vocabulary for discussing those problems, and to offer a set of broad conceptual solutions to those problems with accompanying terminology.  The format is inspired by Martin Fowler's books *<a href="https://books.google.com/books/about/Patterns_of_enterprise_application_archi.html?id=FyWZt5DdvFkC" target="_blank">Patterns of Enterprise Application Architecture</a>* and *<a href="https://books.google.com/books/about/Refactoring.html?id=1MsETFPD3I0C" target="_blank">Refactoring</a>*.
 9 | 
10 | 


--------------------------------------------------------------------------------
/terraform/aws/lb.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_lb_target_group" "ai_assistant_target_group_https" {
 2 |   name        = "ai-assistant-group-https"
 3 |   port        = 80
 4 |   protocol    = "HTTP"
 5 |   vpc_id      = data.aws_vpc.ai_assistant_vpc.id
 6 |   target_type = "ip"
 7 | }
 8 | 
 9 | resource "aws_lb" "application_lb" {
10 |   name               = "ai-assistant-alb-tf-https"
11 |   internal           = false
12 |   load_balancer_type = "application"
13 |   security_groups    = [aws_security_group.ai_assistant_security_group.id]
14 |   subnets            = [data.aws_subnet.ai_assistant_subnet_1.id, data.aws_subnet.ai_assistant_subnet_2.id, data.aws_subnet.ai_assistant_subnet_3.id]
15 | }
16 | 
17 | resource "aws_lb_listener" "application_lb_listener" {
18 |   load_balancer_arn = aws_lb.application_lb.arn
19 |   port              = 443
20 |   protocol          = "HTTPS"
21 |   ssl_policy        = "ELBSecurityPolicy-2016-08"
22 | 
23 |   certificate_arn = data.aws_acm_certificate.ai_assistant_certificate.arn
24 | 
25 |   default_action {
26 |     type             = "forward"
27 |     target_group_arn = aws_lb_target_group.ai_assistant_target_group_https.arn
28 |   }
29 | }
30 | 
31 | resource "aws_lb_listener" "application_lb_listener_redirect" {
32 |   load_balancer_arn = aws_lb.application_lb.arn
33 |   port              = 80
34 |   protocol          = "HTTP"
35 | 
36 |   default_action {
37 |     type = "redirect"
38 | 
39 |     redirect {
40 |       port        = "443"
41 |       protocol    = "HTTPS"
42 |       status_code = "HTTP_301"
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/background.md:
--------------------------------------------------------------------------------
 1 | Contexte
 2 | ==========
 3 | 
 4 | Les contributeurs de ce document ont été directement impliqués dans le développement et le déploiement de centaines d'applications, et ont vu, indirectement, le développement, la gestion et le grossissement de centaines de milliers d'applications via le travail fait sur la plateforme [Heroku](http://www.heroku.com/).
 5 | 
 6 | Ce document fait la synthèse de toutes nos expériences et observations sur une large variété d'applications software-as-a-service. C'est la triangulation de pratiques idéales pour le développement d'applications, en portant un soin tout particulier aux dynamiques de la croissance organique d'une application au cours du temps, les dynamiques de la collaboration entre les développeurs qui travaillent sur le code de l'application, en [évitant le coût de la lente détérioration du logiciel dans un environnement qui évolue (en)](http://blog.heroku.com/archives/2011/6/28/the_new_heroku_4_erosion_resistance_explicit_contracts/).
 7 | 
 8 | Notre motivation est de faire prendre conscience de certains problèmes systémiques que nous avons rencontrés dans le développement d'applications modernes, afin de fournir un vocabulaire partagé pour discuter ces problèmes, et pour offrir un ensemble de solutions conceptuelles générales à ces problèmes, ainsi que la terminologie correspondante. Le format est inspiré par celui des livres de Martin Fowler *[Patterns of Enterprise Application Architecture (en)](http://books.google.com/books/about/Patterns_of_enterprise_application_archi.html?id=FyWZt5DdvFkC)* et *[Refactoring (en)](http://books.google.com/books/about/Refactoring.html?id=1MsETFPD3I0C)*.
 9 | 
10 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/codebase.md:
--------------------------------------------------------------------------------
 1 | ## I. Codebase
 2 | ### One codebase tracked in revision control, many deploys
 3 | 
 4 | A twelve-factor app is always tracked in a version control system, such as [Git](http://git-scm.com/), [Mercurial](https://www.mercurial-scm.org/), or [Subversion](http://subversion.apache.org/).  A copy of the revision tracking database is known as a *code repository*, often shortened to *code repo* or just *repo*.
 5 | 
 6 | A *codebase* is any single repo (in a centralized revision control system like Subversion), or any set of repos who share a root commit (in a decentralized revision control system like Git).
 7 | 
 8 | ![One codebase maps to many deploys](/images/codebase-deploys.png)
 9 | 
10 | There is always a one-to-one correlation between the codebase and the app:
11 | 
12 | * If there are multiple codebases, it's not an app -- it's a distributed system.  Each component in a distributed system is an app, and each can individually comply with twelve-factor.
13 | * Multiple apps sharing the same code is a violation of twelve-factor.  The solution here is to factor shared code into libraries which can be included through the [dependency manager](./dependencies).
14 | 
15 | There is only one codebase per app, but there will be many deploys of the app.  A *deploy* is a running instance of the app.  This is typically a production site, and one or more staging sites.  Additionally, every developer has a copy of the app running in their local development environment, each of which also qualifies as a deploy.
16 | 
17 | The codebase is the same across all deploys, although different versions may be active in each deploy.  For example, a developer has some commits not yet deployed to staging; staging has some commits not yet deployed to production.  But they all share the same codebase, thus making them identifiable as different deploys of the same app.
18 | 
19 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/port-binding.md:
--------------------------------------------------------------------------------
 1 | ## VII. Port binding
 2 | ### Export services via port binding
 3 | 
 4 | Web apps are sometimes executed inside a webserver container.  For example, PHP apps might run as a module inside [Apache HTTPD](http://httpd.apache.org/), or Java apps might run inside [Tomcat](http://tomcat.apache.org/).
 5 | 
 6 | **The twelve-factor app is completely self-contained** and does not rely on runtime injection of a webserver into the execution environment to create a web-facing service.  The web app **exports HTTP as a service by binding to a port**, and listening to requests coming in on that port.
 7 | 
 8 | In a local development environment, the developer visits a service URL like `http://localhost:5000/` to access the service exported by their app.  In deployment, a routing layer handles routing requests from a public-facing hostname to the port-bound web processes.
 9 | 
10 | This is typically implemented by using [dependency declaration](./dependencies) to add a webserver library to the app, such as [Tornado](http://www.tornadoweb.org/) for Python, [Thin](http://code.macournoyer.com/thin/) for Ruby, or [Jetty](http://www.eclipse.org/jetty/) for Java and other JVM-based languages.  This happens entirely in *user space*, that is, within the app's code.  The contract with the execution environment is binding to a port to serve requests.
11 | 
12 | HTTP is not the only service that can be exported by port binding.  Nearly any kind of server software can be run via a process binding to a port and awaiting incoming requests.  Examples include [ejabberd](http://www.ejabberd.im/) (speaking [XMPP](http://xmpp.org/)), and [Redis](http://redis.io/) (speaking the [Redis protocol](http://redis.io/topics/protocol)).
13 | 
14 | Note also that the port-binding approach means that one app can become the [backing service](./backing-services) for another app, by providing the URL to the backing app as a resource handle in the [config](./config) for the consuming app.
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/admin-processes.md:
--------------------------------------------------------------------------------
 1 | ## XII. Admin processes
 2 | ### Run admin/management tasks as one-off processes
 3 | 
 4 | The [process formation](./concurrency) is the array of processes that are used to do the app's regular business (such as handling web requests) as it runs.  Separately, developers will often wish to do one-off administrative or maintenance tasks for the app, such as:
 5 | 
 6 | * Running database migrations (e.g. `manage.py migrate` in Django, `rake db:migrate` in Rails).
 7 | * Running a console (also known as a [REPL](http://en.wikipedia.org/wiki/Read-eval-print_loop) shell) to run arbitrary code or inspect the app's models against the live database.  Most languages provide a REPL by running the interpreter without any arguments (e.g. `python` or `perl`) or in some cases have a separate command (e.g. `irb` for Ruby, `rails console` for Rails).
 8 | * Running one-time scripts committed into the app's repo (e.g. `php scripts/fix_bad_records.php`).
 9 | 
10 | One-off admin processes should be run in an identical environment as the regular [long-running processes](./processes) of the app.  They run against a [release](./build-release-run), using the same [codebase](./codebase) and [config](./config) as any process run against that release.  Admin code must ship with application code to avoid synchronization issues.
11 | 
12 | The same [dependency isolation](./dependencies) techniques should be used on all process types.  For example, if the Ruby web process uses the command `bundle exec thin start`, then a database migration should use `bundle exec rake db:migrate`.  Likewise, a Python program using Virtualenv should use the vendored `bin/python` for running both the Tornado webserver and any `manage.py` admin processes.
13 | 
14 | Twelve-factor strongly favors languages which provide a REPL shell out of the box, and which make it easy to run one-off scripts.  In a local deploy, developers invoke one-off admin processes by a direct shell command inside the app's checkout directory.  In a production deploy, developers can use ssh or other remote command execution mechanism provided by that deploy's execution environment to run such a process.
15 | 


--------------------------------------------------------------------------------
/tests/rag_bedrock/test_rag_llamaindex.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from rag_bedrock.base import LlamaIndexTestRAGHelper
 4 | 
 5 | 
 6 | @pytest.mark.usefixtures("trulens_prepare",
 7 |                          "bedrock_prepare",
 8 |                          "temp_dir",
 9 |                          "documents_prepare",
10 |                          "llm_prepare",
11 |                          "embeddings_prepare",
12 |                          "trulens_context_prepare",
13 |                          "provider_prepare",
14 |                          "eval_questions_prepare",
15 |                          "rag_prepare",
16 |                          "feedbacks_prepare")
17 | class TestRAGLlamaIndexClaude3(LlamaIndexTestRAGHelper):
18 | 
19 |     @property
20 |     def test_name(self):
21 |         return "LlamaIndex_Claude_3_Sonnet_Titan_Embed_V1"
22 | 
23 |     @property
24 |     def model_id(self):
25 |         return "anthropic.claude-3-sonnet-20240229-v1:0"
26 | 
27 |     @property
28 |     def topic(self):
29 |         return "How to Build a Career in AI"
30 | 
31 |     @property
32 |     def embedding_model_id(self):
33 |         return "amazon.titan-embed-text-v1"
34 | 
35 | 
36 | @pytest.mark.usefixtures("trulens_prepare",
37 |                          "bedrock_prepare",
38 |                          "temp_dir",
39 |                          "documents_prepare",
40 |                          "llm_prepare",
41 |                          "embeddings_prepare",
42 |                          "trulens_context_prepare",
43 |                          "provider_prepare",
44 |                          "eval_questions_prepare",
45 |                          "rag_prepare",
46 |                          "feedbacks_prepare")
47 | class TestRAGLlamaIndexMistral(LlamaIndexTestRAGHelper):
48 | 
49 |     @property
50 |     def test_name(self):
51 |         return "LlamaIndex_Mistral_Large_Titan_Embed_V1"
52 | 
53 |     @property
54 |     def model_id(self):
55 |         return "mistral.mistral-large-2402-v1:0"
56 | 
57 |     @property
58 |     def topic(self):
59 |         return "How to Build a Career in AI"
60 | 
61 |     @property
62 |     def embedding_model_id(self):
63 |         return "amazon.titan-embed-text-v1"
64 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/logs.md:
--------------------------------------------------------------------------------
 1 | ## XI. Logs
 2 | ### Treat logs as event streams
 3 | 
 4 | *Logs* provide visibility into the behavior of a running app.  In server-based environments they are commonly written to a file on disk (a "logfile"); but this is only an output format.
 5 | 
 6 | Logs are the [stream](https://adam.herokuapp.com/past/2011/4/1/logs_are_streams_not_files/) of aggregated, time-ordered events collected from the output streams of all running processes and backing services.  Logs in their raw form are typically a text format with one event per line (though backtraces from exceptions may span multiple lines).  Logs have no fixed beginning or end, but flow continuously as long as the app is operating.
 7 | 
 8 | **A twelve-factor app never concerns itself with routing or storage of its output stream.**  It should not attempt to write to or manage logfiles.  Instead, each running process writes its event stream, unbuffered, to `stdout`.  During local development, the developer will view this stream in the foreground of their terminal to observe the app's behavior.
 9 | 
10 | In staging or production deploys, each process' stream will be captured by the execution environment, collated together with all other streams from the app, and routed to one or more final destinations for viewing and long-term archival.  These archival destinations are not visible to or configurable by the app, and instead are completely managed by the execution environment.  Open-source log routers (such as [Logplex](https://github.com/heroku/logplex) and [Fluentd](https://github.com/fluent/fluentd)) are available for this purpose.
11 | 
12 | The event stream for an app can be routed to a file, or watched via realtime tail in a terminal.  Most significantly, the stream can be sent to a log indexing and analysis system such as [Splunk](http://www.splunk.com/), or a general-purpose data warehousing system such as [Hadoop/Hive](http://hive.apache.org/).  These systems allow for great power and flexibility for introspecting an app's behavior over time, including:
13 | 
14 | * Finding specific events in the past.
15 | * Large-scale graphing of trends (such as requests per minute).
16 | * Active alerting according to user-defined heuristics (such as an alert when the quantity of errors per minute exceeds a certain threshold).
17 | 


--------------------------------------------------------------------------------
/terraform/aws/variables.tf:
--------------------------------------------------------------------------------
 1 | variable "vpc_id" {
 2 |   description = "The ID of the VPC"
 3 |   type        = string
 4 | }
 5 | 
 6 | variable "subnet_id_1" {
 7 |   description = "The ID of the first subnet"
 8 |   type        = string
 9 | }
10 | 
11 | variable "subnet_id_2" {
12 |   description = "The ID of the second subnet"
13 |   type        = string
14 | }
15 | 
16 | variable "subnet_id_3" {
17 |   description = "The ID of the third subnet"
18 |   type        = string
19 | }
20 | 
21 | variable "dns_url" {
22 |   description = "The base DNS url (without subnet)."
23 |   type        = string
24 | }
25 | 
26 | variable "dns_url_app_subnet" {
27 |   description = "The DNS URL of your application. (It need to have a valid HTTPS certificate and a route 53 hosted zone)"
28 |   type        = string
29 | }
30 | 
31 | variable "ecr_image_url" {
32 |   description = "The AI assistant demonstrator ECR URL"
33 |   type        = string
34 | }
35 | 
36 | variable "secret_name" {
37 |   description = "SecretManager secret name"
38 |   type        = string
39 | }
40 | 
41 | variable "openai_key_name" {
42 |   description = "OpenAI key name in SecretManager secret"
43 |   type        = string
44 | }
45 | 
46 | variable "mistral_key_name" {
47 |   description = "Mistral key name in SecretManager secret"
48 |   type        = string
49 | }
50 | 
51 | 
52 | variable "hf_token_name" {
53 |   description = "HF token name in SecretManager secret"
54 |   type        = string
55 | }
56 | 
57 | variable "langchain_key_name" {
58 |   description = "Langchain key name in SecretManager secret"
59 |   type        = string
60 | }
61 | 
62 | variable "langchain_tracing_v2_bool" {
63 |   description = "Langchain tracing V2 boolean string ('true' or 'false')"
64 |   type        = string
65 | }
66 | 
67 | variable "opensearch_domain_name" {
68 |   description = "Name of the Opensearch domain"
69 |   type        = string
70 | }
71 | 
72 | variable "opensearch_dashboard_user" {
73 |   description = "Username for Opensearch Dahsboard user"
74 |   type        = string
75 | }
76 | 
77 | variable "opensearch_dashboard_password" {
78 |   description = "Password for Opensearch Dahsboard user"
79 |   type        = string
80 | }
81 | 
82 | variable "opensearch_allowed_users_and_policy_arn" {
83 |   description = "List of allowed users and roles"
84 |   type        = list(string)
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/port-binding.md:
--------------------------------------------------------------------------------
 1 | ## VII. Associations de ports
 2 | ### Exportez les services via des associations de ports
 3 | 
 4 | Les applications web sont parfois exécutées à l'intérieur d'un container de serveur web. Par exemple, les applications PHP peuvent fonctionner comme un module à l'intérieur de [HTTPD, d'Apache](http://httpd.apache.org/), ou bien les applications Java peuvent fonctionner à l'intérieur de [Tomcat](http://tomcat.apache.org/).
 5 | 
 6 | **Les applications 12 facteurs sont complètement auto-contenues** et ne se basent pas sur l'injection au moment de l'exécution d'un serveur web dans l'environnement d'exécution pour créer les services exposés au web. L'application web **expose HTTP comme un service en l'associant à un port** et écoute les requêtes qui arrivent sur ce port.
 7 | 
 8 | Dans un environnement de développement local, le développeur visite l'URL d'un service tel que `http://localhost:5000/` pour accéder au service exporté par leur application. Durant le déploiement, une couche de routage gère le routage des requêtes depuis un nom d'hôte qui s'expose au public, vers les processus sur lequel est associé le port.
 9 | 
10 | Ceci est typiquement implémenté en utilisant [la déclaration de dépendances](./dependencies) pour ajouter une bibliothèque de serveur web, tel que [Tornado](http://www.tornadoweb.org/) pour Python, [Thin](http://code.macournoyer.com/thin/) pour Ruby, ou [Jetty](http://www.eclipse.org/jetty/) pour Java et autres langages basés sur la JVM. Cela se déroule entièrement dans l'espace utilisateur, c'est-à-dire, dans le code de l'application. Le contrat avec l'environnement d'exécution, c'est l'association de port pour servir les requêtes.
11 | 
12 | HTTP n'est pas le seul service qui peut être exporté à l'aide d'association de ports. Presque tout type de serveur peut fonctionner à travers l'association à un port et l'écoute des requêtes entrantes. Il y a par exemple [ejabberd](http://www.ejabberd.im/) (qui parle [XMPP](http://xmpp.org/)), et [Redis](http://redis.io/) (qui parle le [protocole Redis](http://redis.io/topics/protocol)).
13 | 
14 | Notez également que l'approche par association de port signifie qu'une application peut devenir le [service externe](./backing-services) d'une autre application, en fournissant l'URL de l'application externe dans la configuration de l'application qui la consomme.
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/codebase.md:
--------------------------------------------------------------------------------
 1 | ## I. Base de code
 2 | ### Une base de code suivie avec un système de contrôle de version, plusieurs déploiements
 3 | 
 4 | Une application 12 facteurs est toujours suivie dans un système de contrôle de version, tel que [Git](http://git-scm.com/), [Mercurial](https://www.mercurial-scm.org/), ou [Subversion](http://subversion.apache.org/). Une copie de la base de données de suivi des révisions est appelée *dépôt de code*, souvent raccourci en *dépôt*. Le terme anglais *code repository*, raccourci en *repository* et *repo* est également utilisé.
 5 | 
 6 | Une *base de code* correspond à chaque dépôt (dans un système de contrôle de version centralisé tel que Subversion), ou tout ensemble de dépôts qui partage un commit racine (dans un système de contrôle de version décentralisé comme Git).
 7 | 
 8 | ![Une base de code est associée à plusieurs déploiements](/images/codebase-deploys.png)
 9 | 
10 | Il y a toujours un rapport direct entre la base de code et l'application :
11 | 
12 | * S'il y a plusieurs bases de code, ce n'est pas une application, c'est un système distribué. Chaque composant du système distribué est une application, et chacun peut individuellement respecter la méthodologie 12 facteurs.
13 | * Plusieurs applications partageant le même code est une violation des 12 facteurs. La solution dans ce cas est de factoriser le code partagé dans des bibliothèques qui peuvent être intégrées via un [gestionnaire de dépendances](./dependencies).
14 | 
15 | Il y a seulement une base de code par application, mais il y aura plusieurs déploiements de l'application. Un *déploiement* est une instance en fonctionnement de l'application. C'est, par exemple, le site en production, ou bien un ou plusieurs sites de validation. En plus de cela, chaque développeur a une copie de l'application qui fonctionne dans son environnement local de développement, ce qui compte également comme un déploiement.
16 | 
17 | La base de code est la même à travers tous les déploiements, bien que différentes versions puissent être actives dans chaque déploiement. Par exemple, un développeur a des commits qui ne sont pas encore déployés dans l'environnement de validation. L'environnement de validation a des commits qui ne sont pas encore déployés en production. Par contre, ils partagent tous la même base de code, ce qui les identifie comme étant des déploiements différents d'une même application.
18 | 
19 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/processes.md:
--------------------------------------------------------------------------------
 1 | ## VI. Processes
 2 | ### Execute the app as one or more stateless processes
 3 | 
 4 | The app is executed in the execution environment as one or more *processes*.
 5 | 
 6 | In the simplest case, the code is a stand-alone script, the execution environment is a developer's local laptop with an installed language runtime, and the process is launched via the command line (for example, `python my_script.py`).  On the other end of the spectrum, a production deploy of a sophisticated app may use many [process types, instantiated into zero or more running processes](./concurrency).
 7 | 
 8 | **Twelve-factor processes are stateless and [share-nothing](http://en.wikipedia.org/wiki/Shared_nothing_architecture).**  Any data that needs to persist must be stored in a stateful [backing service](./backing-services), typically a database.
 9 | 
10 | The memory space or filesystem of the process can be used as a brief, single-transaction cache.  For example, downloading a large file, operating on it, and storing the results of the operation in the database.  The twelve-factor app never assumes that anything cached in memory or on disk will be available on a future request or job -- with many processes of each type running, chances are high that a future request will be served by a different process.  Even when running only one process, a restart (triggered by code deploy, config change, or the execution environment relocating the process to a different physical location) will usually wipe out all local (e.g., memory and filesystem) state.
11 | 
12 | Asset packagers like [django-assetpackager](http://code.google.com/p/django-assetpackager/) use the filesystem as a cache for compiled assets.  A twelve-factor app prefers to do this compiling during the [build stage](/build-release-run). Asset packagers such as [Jammit](http://documentcloud.github.io/jammit/) and the [Rails asset pipeline](http://ryanbigg.com/guides/asset_pipeline.html) can be configured to package assets during the build stage.
13 | 
14 | Some web systems rely on ["sticky sessions"](http://en.wikipedia.org/wiki/Load_balancing_%28computing%29#Persistence) -- that is, caching user session data in memory of the app's process and expecting future requests from the same visitor to be routed to the same process.  Sticky sessions are a violation of twelve-factor and should never be used or relied upon.  Session state data is a good candidate for a datastore that offers time-expiration, such as [Memcached](http://memcached.org/) or [Redis](http://redis.io/).
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/build-release-run.md:
--------------------------------------------------------------------------------
 1 | ## V. Build, release, run
 2 | ### Strictly separate build and run stages
 3 | 
 4 | A [codebase](./codebase) is transformed into a (non-development) deploy through three stages:
 5 | 
 6 | * The *build stage* is a transform which converts a code repo into an executable bundle known as a *build*.  Using a version of the code at a commit specified by the deployment process, the build stage fetches vendors [dependencies](./dependencies) and compiles binaries and assets.
 7 | * The *release stage* takes the build produced by the build stage and combines it with the deploy's current [config](./config).  The resulting *release* contains both the build and the config and is ready for immediate execution in the execution environment.
 8 | * The *run stage* (also known as "runtime") runs the app in the execution environment, by launching some set of the app's [processes](./processes) against a selected release.
 9 | 
10 | ![Code becomes a build, which is combined with config to create a release.](/images/release.png)
11 | 
12 | **The twelve-factor app uses strict separation between the build, release, and run stages.**  For example, it is impossible to make changes to the code at runtime, since there is no way to propagate those changes back to the build stage.
13 | 
14 | Deployment tools typically offer release management tools, most notably the ability to roll back to a previous release.  For example, the [Capistrano](https://github.com/capistrano/capistrano/wiki) deployment tool stores releases in a subdirectory named `releases`, where the current release is a symlink to the current release directory.  Its `rollback` command makes it easy to quickly roll back to a previous release.
15 | 
16 | Every release should always have a unique release ID, such as a timestamp of the release (such as `2011-04-06-20:32:17`) or an incrementing number (such as `v100`).  Releases are an append-only ledger and a release cannot be mutated once it is created.  Any change must create a new release.
17 | 
18 | Builds are initiated by the app's developers whenever new code is deployed.  Runtime execution, by contrast, can happen automatically in cases such as a server reboot, or a crashed process being restarted by the process manager.  Therefore, the run stage should be kept to as few moving parts as possible, since problems that prevent an app from running can cause it to break in the middle of the night when no developers are on hand.  The build stage can be more complex, since errors are always in the foreground for a developer who is driving the deploy.
19 | 
20 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/dependencies.md:
--------------------------------------------------------------------------------
 1 | ## II. Dependencies
 2 | ### Explicitly declare and isolate dependencies
 3 | 
 4 | Most programming languages offer a packaging system for distributing support libraries, such as [CPAN](http://www.cpan.org/) for Perl or [Rubygems](http://rubygems.org/) for Ruby.  Libraries installed through a packaging system can be installed system-wide (known as "site packages") or scoped into the directory containing the app (known as "vendoring" or "bundling").
 5 | 
 6 | **A twelve-factor app never relies on implicit existence of system-wide packages.**  It declares all dependencies, completely and exactly, via a *dependency declaration* manifest.  Furthermore, it uses a *dependency isolation* tool during execution to ensure that no implicit dependencies "leak in" from the surrounding system.  The full and explicit dependency specification is applied uniformly to both production and development.
 7 | 
 8 | For example, [Bundler](https://bundler.io/) for Ruby offers the `Gemfile` manifest format for dependency declaration and `bundle exec` for dependency isolation.  In Python there are two separate tools for these steps -- [Pip](http://www.pip-installer.org/en/latest/) is used for declaration and [Virtualenv](http://www.virtualenv.org/en/latest/) for isolation.  Even C has [Autoconf](http://www.gnu.org/s/autoconf/) for dependency declaration, and static linking can provide dependency isolation.  No matter what the toolchain, dependency declaration and isolation must always be used together -- only one or the other is not sufficient to satisfy twelve-factor.
 9 | 
10 | One benefit of explicit dependency declaration is that it simplifies setup for developers new to the app.  The new developer can check out the app's codebase onto their development machine, requiring only the language runtime and dependency manager installed as prerequisites.  They will be able to set up everything needed to run the app's code with a deterministic *build command*.  For example, the build command for Ruby/Bundler is `bundle install`, while for Clojure/[Leiningen](https://github.com/technomancy/leiningen#readme) it is `lein deps`.
11 | 
12 | Twelve-factor apps also do not rely on the implicit existence of any system tools.  Examples include shelling out to ImageMagick or `curl`.  While these tools may exist on many or even most systems, there is no guarantee that they will exist on all systems where the app may run in the future, or whether the version found on a future system will be compatible with the app.  If the app needs to shell out to a system tool, that tool should be vendored into the app.
13 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/concurrency.md:
--------------------------------------------------------------------------------
 1 | ## VIII. Concurrency
 2 | ### Scale out via the process model
 3 | 
 4 | Any computer program, once run, is represented by one or more processes.  Web apps have taken a variety of process-execution forms.  For example, PHP processes run as child processes of Apache, started on demand as needed by request volume.  Java processes take the opposite approach, with the JVM providing one massive uberprocess that reserves a large block of system resources (CPU and memory) on startup, with concurrency managed internally via threads.  In both cases, the running process(es) are only minimally visible to the developers of the app.
 5 | 
 6 | ![Scale is expressed as running processes, workload diversity is expressed as process types.](/images/process-types.png)
 7 | 
 8 | **In the twelve-factor app, processes are a first class citizen.**  Processes in the twelve-factor app take strong cues from [the unix process model for running service daemons](https://adam.herokuapp.com/past/2011/5/9/applying_the_unix_process_model_to_web_apps/).  Using this model, the developer can architect their app to handle diverse workloads by assigning each type of work to a *process type*.  For example, HTTP requests may be handled by a web process, and long-running background tasks handled by a worker process.
 9 | 
10 | This does not exclude individual processes from handling their own internal multiplexing, via threads inside the runtime VM, or the async/evented model found in tools such as [EventMachine](https://github.com/eventmachine/eventmachine), [Twisted](http://twistedmatrix.com/trac/), or [Node.js](http://nodejs.org/).  But an individual VM can only grow so large (vertical scale), so the application must also be able to span multiple processes running on multiple physical machines.
11 | 
12 | The process model truly shines when it comes time to scale out.  The [share-nothing, horizontally partitionable nature of twelve-factor app processes](./processes) means that adding more concurrency is a simple and reliable operation.  The array of process types and number of processes of each type is known as the *process formation*.
13 | 
14 | Twelve-factor app processes [should never daemonize](http://dustin.github.com/2010/02/28/running-processes.html) or write PID files.  Instead, rely on the operating system's process manager (such as [systemd](https://www.freedesktop.org/wiki/Software/systemd/), a distributed process manager on a cloud platform, or a tool like [Foreman](http://blog.daviddollar.org/2011/05/06/introducing-foreman.html) in development) to manage [output streams](./logs), respond to crashed processes, and handle user-initiated restarts and shutdowns.
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/admin-processes.md:
--------------------------------------------------------------------------------
 1 | ## XII. Processus d'administration
 2 | ### Lancez les processus d'administration et de maintenance comme des one-off-processes
 3 | 
 4 | La [formation de processus](./concurrency) est la liste des processus qui sont utilisés pour le fonctionnement normal de l'application (comme gérer les requêtes web) lorsqu'elle tourne. Les développeurs vont souvent vouloir effectuer des tâches occasionnelles d'administration ou de maintenance, comme :
 5 | 
 6 | * Lancer les migrations de base de données (par ex. `manage.py migrate` avec Django, `rake db:migrate` avec Rails).
 7 | * Lancer une console (également appelée terminal [REPL](http://en.wikipedia.org/wiki/Read-eval-print_loop)) pour exécuter du code arbitraire ou inspecter les modèles de l'application dans la base de données. La plupart des langages fournissent un terminal REPL en lançant l'interpréteur sans arguments (par exemple `python` ou `perl`), ou dans certains cas à l'aide d'une commande dédiée (par ex. `irb` pour Ruby, `rails console` pour Rails).
 8 | * Exécuter des scripts ponctuels inclus dans le dépôt de code (par ex. `php scripts/fix_bad_records.php`).
 9 | 
10 | Les processus ponctuels d'administration devraient être lancés dans un environnement identique à ceux des [processus standards](./processes) de l'application. Ils s'exécutent sur une [release](./build-release-run), en utilisant la même [base de code](./codebase) et [configuration](./config) que tout processus qui tourne pour cette release. Le code d'administration doit être livré avec le code de l'application afin d'éviter les problèmes de synchronisation.
11 | 
12 | La même technique d'[isolation de dépendances](./dependencies) doit être utilisée sur tous les types de processus. Par exemple, si le processus web de Ruby utilise la commande `bundle exec thin start`, alors une migration de base de données devrait être faite via `bundle exec rake db:migrate`. De la même manière, un programme Python qui utilise Virtualenv devrait utiliser la commande incluse `bin/python` pour lancer à la fois le serveur web Tornado et tout processus administrateur `manage.py`.
13 | 
14 | Les applications 12 facteurs préfèrent les langages qui fournissent un terminal REPL prêt à l'emploi, et qui facilitent l'exécution de scripts ponctuels. Dans un déploiement local, les développeurs invoquent les processus ponctuels d'administration depuis le terminal, par une commande directement dans le répertoire où se trouve l'application. Dans un déploiement de production, les développeurs peuvent utiliser ssh ou d'autres mécanismes d'exécution de commandes fournis par l'environnement d'exécution de ce déploiement pour exécuter un tel processus.
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/logs.md:
--------------------------------------------------------------------------------
 1 | ## XI. Logs
 2 | ### Traitez les logs comme des flux d'évènements
 3 | 
 4 | Les *logs* fournissent de la visibilité au comportement de l'application qui s'exécute. Dans des environnements de type serveur, ils sont généralement écrits dans un fichier, sur le disque (dans un fichier de log). Mais c'est simplement un format de sortie.
 5 | 
 6 | Les logs sont des [flux (en)](https://adam.herokuapp.com/past/2011/4/1/logs_are_streams_not_files/) d'agrégats d'évènements, ordonnés dans le temps, collectés à travers les flux de sortie de tous les processus et services externes qui tournent. Les logs, dans leur forme brute, sont au format texte avec un événement par ligne (bien que les traces d'exécutions puissent s'étaler sur plusieurs lignes). Les logs n'ont pas de début ou de fin fixe, mais se remplissent en continu tant que l'application est en marche.
 7 | 
 8 | **Une application 12 facteurs ne s'inquiète jamais du routage ou du stockage de ses flux de sortie.** Elle ne devrait pas tenter d'écrire ou de gérer les fichiers de logs. À la place, chaque processus qui tourne écrit ses flux d'événements, sans tampon, vers `stdout`, la sortie standard ; en phase de développement local, les développeurs pourront voir ce flux dans leur terminal et observer le comportement de l'application.
 9 | 
10 | Dans les déploiements de validation ou de production, les flux de chaque processus seront capturés par leur environnement d'exécution, assemblés avec les autres flux de l'application, et routés vers une ou plusieurs destinations pour un visionnage et un archivage de longue durée. Le lieu d'archivage n'est pas visible et ne peut être configuré par l'application : ils sont complètements gérés par l'environnement d'exécution. Des routeurs opensource de logs, (tel que [Logplex](https://github.com/heroku/logplex) et [Fluentd](https://github.com/fluent/fluentd)) existent pour cela.
11 | 
12 | Le flux d'événements d'une application peut être routé dans un fichier, ou surveillé en temps réel (avec tail) dans un terminal. Plus pertinent, les flux peuvent être envoyés vers un outil d'indexation et d'archivage des logs tel que [Splunk](http://www.splunk.com/), ou bien dans un entrepôt de données générique comme [Hadoop/Hive](http://hive.apache.org/). Ces systèmes sont très puissants et flexibles pour inspecter le comportement de l'application au cours du temps, ce qui inclut :
13 | 
14 | * Trouver un événement spécifique dans le passé
15 | * Faire des graphiques à grande échelle des tendances (comme le nombre de requêtes par minutes)
16 | * Lever des alertes, à partir d'heuristiques définies par l'utilisateur (comme alerter dès que la quantité d'erreurs par minutes dépasse un certain seuil)
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "applied-ai-rag-assistant"
 3 | version = "0.0.1"
 4 | description = "Advanced RAG assistant"
 5 | authors = ["Pierre Bittner <pierre.bittner@gmail.com>"]
 6 | license = "Apache 2.0"
 7 | readme = "README.md"
 8 | packages = [{include = "rag_assistant"}]
 9 | 
10 | [tool.poetry.dependencies]
11 | python = ">=3.10,<3.12"
12 | langchain = {version = ">=0.2.0"}
13 | openai = ">=1.6.1"
14 | duckduckgo-search = ">4.0"
15 | pypdf = ">=4.0.1,<5.0.0" # "^3.12.2"
16 | sentence-transformers = "^2.2.2"
17 | torch = ">=2.0.0, !=2.0.1"
18 | tabulate = "^0.9.0"
19 | streamlit-feedback = "^0.1.3"
20 | langchain-experimental = "^0.0.59"
21 | streamlit = ">=1.26"
22 | docarray = "^0.40.0"
23 | langchain-community = ">=0.2.0" # must have "^0.0.29"
24 | langchain-openai = ">=0.1.1"
25 | streamlit_pdf_viewer = ">=0.0.12"
26 | # There is a incompatibility issue with chroma and llamaindex
27 | # was working with 0.4.3 but it is no longer supporter by llamaidnex and there is incompatibility issue
28 | # newer version of chromadb raised "ModuleNotFoundError: No module named 'hnswlib'" in vector/local_hnsw.py", line 26
29 | # llama-index-vector-stores-chroma (0.1.6) depends on chromadb (>=0.4.22,<0.5.0)
30 | # at runtime
31 | # using FAISS for now as vectorstore
32 | # chromadb  = "0.4.3" # Must have "0.4.3"
33 | # solved by doing pip uninstall hnswlib
34 | # and pip install chroma-hnswlib
35 | chromadb = ">=0.4.24"
36 | boto3= ">=1.28.59"
37 | chroma-hnswlib = ">=0.7.3"
38 | numexpr = "^2.8.8"
39 | langchainhub = "^0.1.14"
40 | llama-index =  ">=0.10.20"
41 | llama-index-llms-openai =  ">=0.1.12"
42 | llama-index-readers-wikipedia = ">=0.1.3"
43 | llama-index-vector-stores-chroma = ">=0.1.6"
44 | llama-index-embeddings-huggingface = ">=0.1.6"
45 | nltk = ">=3.8.1"
46 | python-dotenv= ">=1.0.0"
47 | PyPDF2 = ">=3.0.1"
48 | faiss-cpu = ">=1.8.0"
49 | wikipedia = ">=1.4.0"
50 | pathlib = ">=1.0.1"
51 | pytest = ">=8.1.1"
52 | trulens_eval = ">=0.27.0"
53 | mistralai = ">=0.4.0"
54 | llama-index-llms-mistralai = ">=0.1.10"
55 | llama-index-llms-bedrock = ">=0.1.8"
56 | llama-index-llms-anthropic = ">=0.1.11"
57 | llama-index-embeddings-mistralai = ">=0.1.4"
58 | llama-index-embeddings-langchain = ">=0.1.2"
59 | llama-index-embeddings-bedrock = ">=0.1.5"
60 | protobuf = "=3.20.3"
61 | 
62 | # pysqlite3-binary = "^0.5.2.post3"
63 | langchain_mistralai = ">=0.1.0"
64 | langchain-aws = ">=0.1.4"
65 | 
66 | opensearch-py = ">=2.6.0"
67 | requests-aws4auth = ">=1.2.3"
68 | 
69 | lark = ">=1.1.9"
70 | 
71 | [tool.poetry.group.dev.dependencies]
72 | black = "^23.3.0"
73 | mypy = "^1.4.1"
74 | pre-commit = "^3.3.3"
75 | 
76 | [build-system]
77 | requires = ["poetry-core"]
78 | build-backend = "poetry.core.masonry.api"
79 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/backing-services.md:
--------------------------------------------------------------------------------
 1 | ## IV. Backing services
 2 | ### Treat backing services as attached resources
 3 | 
 4 | A *backing service* is any service the app consumes over the network as part of its normal operation.  Examples include datastores (such as [MySQL](http://dev.mysql.com/) or [CouchDB](http://couchdb.apache.org/)), messaging/queueing systems (such as [RabbitMQ](http://www.rabbitmq.com/) or [Beanstalkd](https://beanstalkd.github.io)), SMTP services for outbound email (such as [Postfix](http://www.postfix.org/)), and caching systems (such as [Memcached](http://memcached.org/)).
 5 | 
 6 | Backing services like the database are traditionally managed by the same systems administrators who deploy the app's runtime.  In addition to these locally-managed services, the app may also have services provided and managed by third parties.  Examples include SMTP services (such as [Postmark](http://postmarkapp.com/)), metrics-gathering services (such as [New Relic](http://newrelic.com/) or [Loggly](http://www.loggly.com/)), binary asset services (such as [Amazon S3](http://aws.amazon.com/s3/)), and even API-accessible consumer services (such as [Twitter](http://dev.twitter.com/), [Google Maps](https://developers.google.com/maps/), or [Last.fm](http://www.last.fm/api)).
 7 | 
 8 | **The code for a twelve-factor app makes no distinction between local and third party services.**  To the app, both are attached resources, accessed via a URL or other locator/credentials stored in the [config](./config).  A [deploy](./codebase) of the twelve-factor app should be able to swap out a local MySQL database with one managed by a third party (such as [Amazon RDS](http://aws.amazon.com/rds/)) without any changes to the app's code.  Likewise, a local SMTP server could be swapped with a third-party SMTP service (such as Postmark) without code changes.  In both cases, only the resource handle in the config needs to change.
 9 | 
10 | Each distinct backing service is a *resource*.  For example, a MySQL database is a resource; two MySQL databases (used for sharding at the application layer) qualify as two distinct resources.  The twelve-factor app treats these databases as *attached resources*, which indicates their loose coupling to the deploy they are attached to.
11 | 
12 | <img src="/images/attached-resources.png" class="full" alt="A production deploy attached to four backing services." />
13 | 
14 | Resources can be attached to and detached from deploys at will.  For example, if the app's database is misbehaving due to a hardware issue, the app's administrator might spin up a new database server restored from a recent backup.  The current production database could be detached, and the new database attached -- all without any code changes.
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/disposability.md:
--------------------------------------------------------------------------------
 1 | ## IX. Disposability
 2 | ### Maximize robustness with fast startup and graceful shutdown
 3 | 
 4 | **The twelve-factor app's [processes](./processes) are *disposable*, meaning they can be started or stopped at a moment's notice.**  This facilitates fast elastic scaling, rapid deployment of [code](./codebase) or [config](./config) changes, and robustness of production deploys.
 5 | 
 6 | Processes should strive to **minimize startup time**.  Ideally, a process takes a few seconds from the time the launch command is executed until the process is up and ready to receive requests or jobs.  Short startup time provides more agility for the [release](./build-release-run) process and scaling up; and it aids robustness, because the process manager can more easily move processes to new physical machines when warranted.
 7 | 
 8 | Processes **shut down gracefully when they receive a [SIGTERM](http://en.wikipedia.org/wiki/SIGTERM)** signal from the process manager.  For a web process, graceful shutdown is achieved by ceasing to listen on the service port (thereby refusing any new requests), allowing any current requests to finish, and then exiting.  Implicit in this model is that HTTP requests are short (no more than a few seconds), or in the case of long polling, the client should seamlessly attempt to reconnect when the connection is lost.
 9 | 
10 | For a worker process, graceful shutdown is achieved by returning the current job to the work queue.  For example, on [RabbitMQ](http://www.rabbitmq.com/) the worker can send a [`NACK`](http://www.rabbitmq.com/amqp-0-9-1-quickref.html#basic.nack); on [Beanstalkd](https://beanstalkd.github.io), the job is returned to the queue automatically whenever a worker disconnects.  Lock-based systems such as [Delayed Job](https://github.com/collectiveidea/delayed_job#readme) need to be sure to release their lock on the job record.  Implicit in this model is that all jobs are [reentrant](http://en.wikipedia.org/wiki/Reentrant_%28subroutine%29), which typically is achieved by wrapping the results in a transaction, or making the operation [idempotent](http://en.wikipedia.org/wiki/Idempotence).
11 | 
12 | Processes should also be **robust against sudden death**, in the case of a failure in the underlying hardware.  While this is a much less common occurrence than a graceful shutdown with `SIGTERM`, it can still happen.  A recommended approach is use of a robust queueing backend, such as Beanstalkd, that returns jobs to the queue when clients disconnect or time out.  Either way, a twelve-factor app is architected to handle unexpected, non-graceful terminations.  [Crash-only design](http://lwn.net/Articles/191059/) takes this concept to its [logical conclusion](http://docs.couchdb.org/en/latest/intro/overview.html).
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/terraform/aws/iam.tf:
--------------------------------------------------------------------------------
 1 | resource "aws_iam_role" "ai_assistant_ecs_execution_role" {
 2 |   name = "ai_assistant_ecs_execution_role_https"
 3 | 
 4 |   assume_role_policy = jsonencode({
 5 |     Version = "2012-10-17",
 6 |     Statement = [{
 7 |       Action = "sts:AssumeRole",
 8 |       Effect = "Allow",
 9 |       Principal = {
10 |         Service = "ecs-tasks.amazonaws.com"
11 |       }
12 |     }]
13 |   })
14 | }
15 | 
16 | resource "aws_iam_role_policy_attachment" "ai_assistant_secret_read_role_attachment" {
17 |   policy_arn = "arn:aws:iam::aws:policy/SecretsManagerReadWrite"
18 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
19 | }
20 | 
21 | resource "aws_iam_role_policy_attachment" "ai_assistant_cloud_watch_access_role_attachment" {
22 |   policy_arn = "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"
23 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
24 | }
25 | 
26 | resource "aws_iam_role_policy_attachment" "ai_assistant_efs_access_role_attachment" {
27 |   policy_arn = "arn:aws:iam::aws:policy/AmazonElasticFileSystemFullAccess"
28 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
29 | }
30 | 
31 | resource "aws_iam_role_policy_attachment" "ai_assistant_ecs_execution_role_attachment" {
32 |   policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
33 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
34 | }
35 | 
36 | resource "aws_iam_role_policy_attachment" "ai_assistant_ec2_container_registry_role_attachment" {
37 |   policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
38 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
39 | }
40 | 
41 | resource "aws_iam_role_policy_attachment" "ai_assistant_bedrock_access_role_attachment" {
42 |   policy_arn = "arn:aws:iam::aws:policy/AmazonBedrockFullAccess"
43 |   role       =  aws_iam_role.ai_assistant_ecs_execution_role.name
44 | }
45 | 
46 | resource "aws_iam_role_policy_attachment" "ai_assistant_opensearch_access_role_attachment" {
47 |   policy_arn = "arn:aws:iam::aws:policy/AmazonOpenSearchServiceFullAccess"
48 |   role       =  aws_iam_role.ai_assistant_ecs_execution_role.name
49 | }
50 | 
51 | 
52 | resource "aws_iam_policy" "ai_assistant_s3_access_policy" {
53 |   name        = "ai_assistant_s3_access_policy"
54 |   
55 |   policy = jsonencode({
56 |     Version = "2012-10-17"
57 |     Statement = [
58 |       {
59 |         Effect   = "Allow"
60 |         Action   = [
61 |           "s3:GetObject",
62 |           "s3:PutObject",
63 |           "s3:DeleteObject"
64 |         ]
65 |         Resource = [
66 |           "${aws_s3_bucket.ai_assistant_bucket.arn}/*"
67 |         ]
68 |       }
69 |     ]
70 |   })
71 | }
72 | 
73 | resource "aws_iam_role_policy_attachment" "ai_assistant_s3_access_role_attachment" {
74 |   policy_arn = aws_iam_policy.ai_assistant_s3_access_policy.arn
75 |   role       = aws_iam_role.ai_assistant_ecs_execution_role.name
76 | }


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/processes.md:
--------------------------------------------------------------------------------
 1 | ## VI. Processus
 2 | ### Exécutez l'application comme un ou plusieurs processus sans état
 3 | 
 4 | L'application est exécutée dans l'environnement d'exécution comme un ou plusieurs *processus*.
 5 | 
 6 | Dans la situation la plus simple, le code est un script indépendant, l'environnement d'exécution est l'ordinateur portable du développeur sur lequel est installé de quoi exécuter le langage, et le processus est lancé depuis la ligne de commande. (par exemple, `python mon_script.py`). De l'autre côté du spectre, un déploiement de production d'une application sophistiquée peut utiliser plusieurs [types de processus, instanciés dans zéro ou plus processus en fonctionnement](./concurrency).
 7 | 
 8 | **Les processus 12 facteurs sont sans état et ne partagent [rien (en)](http://en.wikipedia.org/wiki/Shared_nothing_architecture).**  Toute donnée qui doit être persistée doit être stockée dans un [service externe](./backing-services) stateful, typiquement une base de données.
 9 | 
10 | L'espace mémoire ou le système de fichier du processus peut être utilisé comme cache momentané pour des transactions uniques. Par exemple, télécharger un gros fichier, effectuer une opération dessus, puis stocker le résultat de l'opération dans la base de données. Les applications 12 facteurs ne supposent jamais que quelque chose ayant été mis en cache en mémoire ou sur le disque sera disponible dans une future requête ou job — avec plusieurs processus de chaque type qui s'exécutent, il y a de grandes chances qu'une future requête soit effectuée par un processus différent. Même lorsque l'on fait tourner seulement un processus, un redémarrage (déclenché par le déploiement du code, un changement de configuration, ou l'environnement d'exécution qui déplace le processus vers un lieu physique différent) va généralement balayer toutes les modifications locales (c'est-à-dire en mémoire et sur le disque).
11 | 
12 | Des outils de création de paquets de ressources (ou "asset packagers") (tel que [Jammit](http://documentcloud.github.io/jammit/) ou [django-compressor](http://django-compressor.readthedocs.org/)) utilisent le système de fichier comme cache pour les ressources compilées. Une application 12 facteurs préfère faire cette compilation durant l'[étape d'assemblage](./build-release-run), comme avec le [pipeline des ressources de Rails](http://guides.rubyonrails.org/asset_pipeline.html), plutôt que durant l'exécution.
13 | 
14 | Certains systèmes web s'appuient sur des ["sessions persistantes" (en)](http://en.wikipedia.org/wiki/Load_balancing_%28computing%29#Persistence) -- c'est-à-dire, mettre en cache les données de session utilisateur dans le processus de l'application et attendre que les requêtes futures du même visiteur seront routées dans le même processus. Les sessions persistantes sont une violation des 12 facteurs, qu'il ne faudrait jamais utiliser.
15 | Les états de session sont de bons candidats pour un datastore qui offre des dates d'expiration, comme [Memcached](http://memcached.org/) ou [Redis](http://redis.io/).
16 | 
17 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/dependencies.md:
--------------------------------------------------------------------------------
 1 | ## II. Dépendances
 2 | ### Déclarez explicitement et isolez les dépendances
 3 | 
 4 | La plupart des langages de programmation offrent des systèmes pour créer des paquets à partir de bibliothèques afin de les distribuer, tel que [CPAN](http://www.cpan.org/) pour Perl ou [Rubygems](http://rubygems.org/) pour Ruby. Les bibliothèques installées à travers un système de packaging peuvent être installées à travers tout le système, ou bien limitées au répertoire contenant l'application (que l'on appelle les "vendor" ou "bundles").
 5 | 
 6 | **Une application 12 facteurs ne dépend jamais de l'existence implicite de packages au niveau du système**. Elle déclare toutes ses dépendances, complètement et exactement, à travers un manifeste de *déclaration de dépendances*. De plus, elle utilise un outil d'isolation des dépendances durant l'exécution afin d'assurer qu'aucune dépendance implicite ne s'introduise depuis le système environnant. Les spécifications complètes et explicites sont appliquées uniformément en développement comme en production.
 7 | 
 8 | Par exemple, [Bundler](https://bundler.io/) pour Ruby fournit le format de manifeste `Gemfile` pour la déclaration des dépendances, ainsi que la commande `bundle exec` pour l'isolation des dépendances. En python, il y a deux outils séparés pour ces étapes -- [Pip](http://www.pip-installer.org/en/latest/) est utilisé pour la déclaration et [Virtualenv](http://www.virtualenv.org/en/latest/) pour l'isolation. Même le C dispose d'[Autoconf](http://www.gnu.org/s/autoconf/) pour les déclarations de dépendances, et la liaison statique peut fournir l'isolation des dépendances. Peu importe la chaîne d'outils, la déclaration et l'isolation des dépendances doivent toujours être utilisées ensemble -- seulement l'un ou l'autre ne suffit pas à satisfaire les 12 facteurs.
 9 | 
10 | Un des bénéfices de la déclaration explicite des dépendances est que cela simplifie la mise en place pour les développeurs qui découvrent l'application. Les nouveaux développeurs peuvent jeter un œil à la base de code de l'application sur leur machine de développement, en ayant besoin uniquement d'avoir de quoi exécuter le langage ainsi que le gestionnaire de dépendances installé en pré-requis. Ils pourront mettre en place tout ce qui est nécessaire pour faire fonctionner le code de l'application de manière déterministe grâce à une *commande d'assemblage* (commande de build). Par exemple, la commande d'assemblage pour Ruby/Bundler est `bundle install`, alors que pour Clojure/[Leiningen](https://github.com/technomancy/leiningen#readme) c'est `lein deps`.
11 | 
12 | Les applications 12 facteurs ne s'appuient pas sur l'existence implicite d'outils système, par exemple ImageMagick ou `curl`. Bien que ces outils puissent exister sur beaucoup voire la plupart des systèmes d'exploitation, il n'y a pas de garantie qu'ils existeront sur tous les systèmes où l'application sera exécutée à l'avenir, ou si la version disponible sur un système futur sera compatible avec l'application. Si l'application dépend d'un outil système, cet outil doit être distribué avec l'application.
13 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/config.md:
--------------------------------------------------------------------------------
 1 | ## III. Config
 2 | ### Store config in the environment
 3 | 
 4 | An app's *config* is everything that is likely to vary between [deploys](./codebase) (staging, production, developer environments, etc).  This includes:
 5 | 
 6 | * Resource handles to the database, Memcached, and other [backing services](./backing-services)
 7 | * Credentials to external services such as Amazon S3 or Twitter
 8 | * Per-deploy values such as the canonical hostname for the deploy
 9 | 
10 | Apps sometimes store config as constants in the code.  This is a violation of twelve-factor, which requires **strict separation of config from code**.  Config varies substantially across deploys, code does not.
11 | 
12 | A litmus test for whether an app has all config correctly factored out of the code is whether the codebase could be made open source at any moment, without compromising any credentials.
13 | 
14 | Note that this definition of "config" does **not** include internal application config, such as `config/routes.rb` in Rails, or how [code modules are connected](http://docs.spring.io/spring/docs/current/spring-framework-reference/html/beans.html) in [Spring](http://spring.io/).  This type of config does not vary between deploys, and so is best done in the code.
15 | 
16 | Another approach to config is the use of config files which are not checked into revision control, such as `config/database.yml` in Rails.  This is a huge improvement over using constants which are checked into the code repo, but still has weaknesses: it's easy to mistakenly check in a config file to the repo; there is a tendency for config files to be scattered about in different places and different formats, making it hard to see and manage all the config in one place.  Further, these formats tend to be language- or framework-specific.
17 | 
18 | **The twelve-factor app stores config in *environment variables*** (often shortened to *env vars* or *env*).  Env vars are easy to change between deploys without changing any code; unlike config files, there is little chance of them being checked into the code repo accidentally; and unlike custom config files, or other config mechanisms such as Java System Properties, they are a language- and OS-agnostic standard.
19 | 
20 | Another aspect of config management is grouping.  Sometimes apps batch config into named groups (often called "environments") named after specific deploys, such as the `development`, `test`, and `production` environments in Rails.  This method does not scale cleanly: as more deploys of the app are created, new environment names are necessary, such as `staging` or `qa`.  As the project grows further, developers may add their own special environments like `joes-staging`, resulting in a combinatorial explosion of config which makes managing deploys of the app very brittle.
21 | 
22 | In a twelve-factor app, env vars are granular controls, each fully orthogonal to other env vars.  They are never grouped together as "environments", but instead are independently managed for each deploy.  This is a model that scales up smoothly as the app naturally expands into more deploys over its lifetime.
23 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/build-release-run.md:
--------------------------------------------------------------------------------
 1 | ## V. Assemblez, publiez, exécutez
 2 | ### Séparez strictement les étapes d'assemblage et d'exécution
 3 | 
 4 | Une [base de code](./codebase) est transformée en un déploiement (non-développement) à travers les étapes suivantes :
 5 | 
 6 | * L'*étape d'assemblage* (ou "build") est une transformation qui convertit un dépôt de code en un paquet autonome exécutable appelé l'assemblage (ou "build"). En utilisant une version du code référencée par un commit spécifié lors du processus de déploiement, l'étape d'assemblage va chercher toutes les [dépendances externes](./dependencies) et compile les fichiers binaires et les ressources.
 7 | * L'*étape de publication * (ou "release") prend l'assemblage produit à l'étape précédente et le combine avec la [configuration](./config) de déploiement courante. La release résultante contient à la fois l'assemblage et la configuration, et elle est prête pour une exécution immédiate dans l'environnement d'exécution.
 8 | * L'*étape d'exécution* (ou "runtime") fait fonctionner l'application dans l'environnement d'exécution, en lançant un ensemble de [processus](./processes) de l'application associée à la release considérée.
 9 | 
10 | ![Le code devient un assemblage, qui est combiné à la configuration pour créer une release](/images/release.png)
11 | 
12 | **Les applications 12 facteurs ont une séparation stricte entre les étapes d'assemblage, de publication et d'exécution.** Par exemple, il est impossible de faire des changements dans le code au moment de son exécution, car il n'y a pas moyen de propager ces changements vers l'étape de build.
13 | 
14 | Les outils de déploiement offrent généralement des outils de gestion de release, permettant notamment de revenir à une release antérieure. Par exemple, l'outil de déploiement [Capistrano](https://github.com/capistrano/capistrano/wiki) stocke les releases dans un sous-répertoire appelé `releases`, où la release courante est un lien symbolique vers le répertoire de release courante. Sa commande `rollback` permet de facilement revenir à une release précédente.
15 | 
16 | Chaque release devrait toujours avoir un identifiant unique, comme un horodatage (timestamp) de la release (tel que `2011-04-06-20:32:17`) ou un nombre incrémental (tel que `v100`). La liste des releases est accessible en écriture incrémentale uniquement, et il n'est pas possible de modifier une release une fois qu'elle a été réalisée. Tout changement doit créer une nouvelle release.
17 | 
18 | Les assemblages sont initiés par le développeur de l'application dès que du nouveau code est déployé. Son exécution, au contraire, peut avoir lieu automatiquement en cas d'un reboot du serveur, ou du crash d'un processus qui est relancé par le gestionnaire de processus. De ce fait, l'étape d'exécution doit se limiter à un nombre minimal de parties mobiles, car les problèmes qui empêchent une application de fonctionner peuvent entraîner des dysfonctionnements au milieu de la nuit alors qu'aucun développeur ne sera là pour les corriger. L'étape d'assemblage peut être plus complexe, car les erreurs pourront toujours être résolues par le développeur qui réalise le déploiement.
19 | 
20 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/backing-services.md:
--------------------------------------------------------------------------------
 1 | ## IV. Services externes
 2 | ### Traitez les services externes comme des ressources attachées
 3 | 
 4 | Un *service externe* (backing service) correspond à tout service que l'application utilise à travers le réseau pour son fonctionnement nominal. Cela concerne par exemple les bases de données (tel que [MySQL](http://dev.mysql.com/) ou [CouchDB](http://couchdb.apache.org/)), les systèmes de messages/files (tel que [RabbitMQ](http://www.rabbitmq.com/) ou [Beanstalkd](https://beanstalkd.github.io)), les services SMTP pour l'envoi d'email (comme [Postfix](http://www.postfix.org/)), ainsi que les systèmes de cache (comme [Memcached](http://memcached.org/)).
 5 | 
 6 | Les *services externes* comme la base de données sont le plus souvent gérés par les mêmes administrateurs réseau que ceux qui gèrent l'application de production. En plus de ces services gérés localement, l'application peut également avoir besoin de services gérés par des tiers. Cela concerne par exemple les services SMTP (comme [Postmark](http://postmarkapp.com/)), les services de gestion de métriques (comme [New Relic](http://newrelic.com/) ou [Loggly](http://www.loggly.com/)), les services de ressources binaires (comme [Amazon S3](http://aws.amazon.com/s3/)), et même les services que l'on peut consommer à travers une API (comme [Twitter](http://dev.twitter.com/), [Google Maps](https://developers.google.com/maps/), ou [Last.fm](http://www.last.fm/api)).
 7 | 
 8 | **Le code d'une application 12 facteurs ne fait pas de distinction entre les services locaux et les services tiers**. Pour l'application, ce sont tous les deux des ressources attachées, accessibles via une URL ou un autre système de localisation et d'authentification stockée dans la [configuration](./config). Un [déploiement](./codebase) d'une application 12 facteurs doit pouvoir remplacer une base de données MySQL locale par une autre gérée par des tiers ([Amazon RDS](http://aws.amazon.com/rds/), par exemple) sans le moindre changement dans le code de l'application. De la même manière, un serveur SMTP local doit pouvoir être remplacé par un service tiers (Postmark, par exemple) sans changements dans le code. Dans les deux cas, seules les informations de configurations doivent changer.
 9 | 
10 | Chaque service externe est une *ressource*. Par exemple, une base de données MySQL est une ressource. Deux bases de données MySQL (utilisées pour faire du sharding dans la couche applicative) correspondent à deux ressources distinctes. L'application 12 facteurs traite ces bases de données comme des ressources attachées, ce qui indique leur couplage faible au déploiement auquel elles sont rattachées.
11 | 
12 | <img src="/images/attached-resources.png" class="full" alt="Un déploiement de production lié à quatre services externes." />
13 | 
14 | Les ressources peuvent être attachées et détachées à volonté à des déploiements. Par exemple, si la base de données de l'application pose problème pour des raisons matérielles, l'administrateur de l'application peut vouloir lancer un nouveau serveur de base de données restauré à partir d'une sauvegarde récente. L'application courante pourrait être détachée de l'ancienne, puis rattachée à la nouvelle — le tout sans changement dans le code.
15 | 


--------------------------------------------------------------------------------
/rag_assistant/streamlit_app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import openai
 3 | from llama_index.llms.openai import OpenAI
 4 | 
 5 | try:
 6 |     from llama_index import VectorStoreIndex, ServiceContext, Document, SimpleDirectoryReader
 7 | except ImportError:
 8 |     from llama_index.core import VectorStoreIndex, ServiceContext, Document, SimpleDirectoryReader
 9 | 
10 | st.set_page_config(page_title="Chat with the Streamlit docs, powered by LlamaIndex", page_icon="🦙", layout="centered",
11 |                    initial_sidebar_state="auto", menu_items=None)
12 | openai.api_key = st.secrets.OPENAI_API_KEY
13 | st.title("Chat with docs, powered by Streamlit & LlamaIndex 💬🦙")
14 | st.info(
15 |     "Check out the full tutorial to build this app in our [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)",
16 |     icon="📃")
17 | 
18 | if "messages" not in st.session_state.keys():  # Initialize the chat messages history
19 |     st.session_state.messages = [
20 |         {"role": "assistant", "content": "Ask me a question about cloud application development best practice!"}
21 |     ]
22 | 
23 | 
24 | @st.cache_resource(show_spinner=False)
25 | def load_data():
26 |     with st.spinner(text="Loading and indexing the Streamlit docs – hang tight! This should take 1-2 minutes."):
27 |         reader = SimpleDirectoryReader(input_dir="./data", recursive=True)
28 |         docs = reader.load_data()
29 |         # llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5, system_prompt="You are an expert o$
30 |         # index = VectorStoreIndex.from_documents(docs)
31 |         service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5,
32 |                                                                   system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions. Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features."))
33 |         index = VectorStoreIndex.from_documents(docs, service_context=service_context)
34 |         return index
35 | 
36 | 
37 | index = load_data()
38 | 
39 | if "chat_engine" not in st.session_state.keys():  # Initialize the chat engine
40 |     st.session_state.chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
41 | 
42 | if prompt := st.chat_input("Your question"):  # Prompt for user input and save to chat history
43 |     st.session_state.messages.append({"role": "user", "content": prompt})
44 | 
45 | for message in st.session_state.messages:  # Display the prior chat messages
46 |     with st.chat_message(message["role"]):
47 |         st.write(message["content"])
48 | 
49 | # If last message is not from assistant, generate a new response
50 | if st.session_state.messages[-1]["role"] != "assistant":
51 |     with st.chat_message("assistant"):
52 |         with st.spinner("Thinking..."):
53 |             response = st.session_state.chat_engine.chat(prompt)
54 |             st.write(response.response)
55 |             message = {"role": "assistant", "content": response.response}
56 |             st.session_state.messages.append(message)  # Add response to message history


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/disposability.md:
--------------------------------------------------------------------------------
 1 | ## IX. Jetable
 2 | ### Maximisez la robustesse avec des démarrages rapides et des arrêts gracieux
 3 | 
 4 | **Les [processus](./processes) des applications 12 facteurs sont *jetables*, c'est-à-dire qu'ils peuvent être démarrés ou stoppés en un instant.** Cela simplifie un rapide grossissement vertical, le déploiement rapide du [code](./codebase) ou de changements dans la [configuration](./config), ainsi que la robustesse des déploiements de production.
 5 | 
 6 | Les processus doivent viser à **minimiser le temps de démarrage**. Idéalement, un processus prend quelques secondes entre le moment où une commande le lance et celui où il est en marche et prêt à recevoir des requêtes ou du travail. Un court temps de démarrage rend les processus de [release](./build-release-run) et de scalabilité verticale plus agiles; il aide également à la robustesse, car les gestionnaires de processus peuvent plus facilement déplacer des processus vers de nouvelles machines physiques lorsque c'est nécessaire.
 7 | 
 8 | Les processus **s'éteignent gracieusement lorsqu'ils reçoivent un signal [SIGTERM (fr)](https://fr.wikipedia.org/wiki/SIGTERM)** du gestionnaire de processus. Pour un processus web, s'éteindre en douceur se fait en arrêtant d'écouter sur le port de service (refusant, par la même occasion, toute nouvelle requête), en permettant à la requête courante de se terminer, et en quittant ensuite. Ce qui est implicite dans ce modèle, c'est que les requêtes sont courtes (pas plus de quelques secondes), ou dans le cas de longues requêtes, les clients doivent pouvoir tenter de se reconnecter sans problème lorsque la connection est perdue.
 9 | 
10 | Pour un processus de worker, s'éteindre gracieusement est réalisé en renvoyant le travail en cours dans la file de travaux. Par exemple, avec [RabbitMQ](http://www.rabbitmq.com/) le worker peut envoyer un message [`NACK`](http://www.rabbitmq.com/amqp-0-9-1-quickref.html#basic.nack); avec [Beanstalkd](https://beanstalkd.github.io), le travail est renvoyé dans la file automatiquement dès qu'un worker se déconnecte. Les systèmes basés sur des verrous, comme [Delayed Job](https://github.com/collectiveidea/delayed_job#readme) doivent s'assurer de supprimer le verrou de leur travail en cours. Il est implicite dans ce modèle que toutes les tâches sont [réentrantes (fr)](http://fr.wikipedia.org/wiki/R%C3%A9entrance), ce qui est réalisé en englobant les résultats dans une transaction, ou en rendant l'opération [idempotente (fr)](http://fr.wikipedia.org/wiki/Idempotence).
11 | 
12 | Les processus doivent également être **robustes face aux morts subites**, dans le cas d'une panne du hardware sous-jacent. Bien que ce soit bien moins courant qu'un arrêt gracieux avec `SIGTERM`, cela peut arriver malgré tout. L'approche recommandée est l'utilisation d'un backend robuste de files de messages, tel que Beanstalkd, capable de renvoyer les tâches dans la file lorsqu'un client se déconnecte ou ne répond plus. Dans les deux cas, une application 12 facteurs est structurée pour gérer des fins inattendues et non-gracieuses. Le [design crash-only (en)](http://lwn.net/Articles/191059/) amène ce concept à sa [conclusion logique (en)](http://docs.couchdb.org/en/latest/intro/overview.html).
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/concurrency.md:
--------------------------------------------------------------------------------
 1 | ## VIII. Concurrence
 2 | ### Grossissez à l'aide du modèle de processus
 3 | 
 4 | Tout programme informatique, lorsqu'il s'exécute, est représenté par un ou plusieurs processus. Les applications web ont adopté différentes approches d'exécution de processus. Par exemple, les processus PHP s'exécutent comme des processus fils d'Apache, démarrés à la demande lorsque c'est requis par le volume de requêtes. Les processus Java ont adopté l'approche inverse, avec une machine virtuelle qui fournit un super-processus massif qui réserve un gros bloc de ressources système (processeur et mémoire) au démarrage, et la concurrence est gérée en interne à l'aide de threads. Dans les deux cas, les processus qui tournent sont à peine visibles aux développeurs de l'application.
 5 | 
 6 | ![La scalabilité est exprimée par des processus qui s'exécutent, la diversité de la charge de travail est exprimée par les types de processus](/images/process-types.png)
 7 | 
 8 | **Dans une application 12 facteurs, les processus sont des élèves modèles**. Les processus dans une application 12 facteurs s'inspirent fortement du [modèle de processus unix pour faire fonctionner les daemon (en)](https://adam.herokuapp.com/past/2011/5/9/applying_the_unix_process_model_to_web_apps/). En utilisant ce modèle, les développeurs peuvent structurer l'application pour gérer différents types de charge en assignant chaque type de travail à un *type de processus*. Par exemple, les requêtes HTTP peuvent être gérées par un processus web, et les tâches d'arrière-plan ayant une longue durée d'exécution peuvent être des processus dits "worker".
 9 | 
10 | Chaque processus peut malgré tout et individuellement, gérer son propre multiplexage interne, avec des threads à l'intérieur de la machine virtuelle d'exécution, ou à l'aide du modèle d'évènements asynchrones que l'on retrouve dans des outils comme [EventMachine](https://github.com/eventmachine/eventmachine), [Twisted](http://twistedmatrix.com/trac/), ou [Node.js](http://nodejs.org/). Mais une machine virtuelle a individuellement une taille limitée (grandissement vertical), donc l'application doit également pouvoir déclencher plusieurs processus qui tournent sur plusieurs machines physiques.
11 | 
12 | Le modèle de processus prend de l'envergure dès qu'il est question de grossir. La [nature sans partage, avec une partition horizontale des processus des applications 12 facteurs](./processes) signifie qu'ajouter plus de concurrence est une opération simple et fiable. La liste des types de processus et du nombre de processus de chaque type est appelée *formation de processus*.
13 | 
14 | Les processus des applications 12 facteurs ne devraient [jamais être des daemons (en)](http://dustin.github.com/2010/02/28/running-processes.html) ou écrire des fichiers PID. À la place, utilisez le gestionnaire de processus du système d'exploitation (tel que [systemd](https://www.freedesktop.org/wiki/Software/systemd/), un gestionnaire de processus distribué sur une plateforme cloud, ou un outil comme [Foreman (en)](http://blog.daviddollar.org/2011/05/06/introducing-foreman.html) durant le développement) pour gérer les [flux de sortie](./logs), répondre à un processus qui plante, et gérer les redémarrages et les arrêts initiés par les utilisateurs.
15 | 


--------------------------------------------------------------------------------
/conf/config.ini:
--------------------------------------------------------------------------------
  1 | [DEFAULT]
  2 | APP_NAME=LLM+RAG Assistant
  3 | 
  4 | [MODEL_PROVIDER]
  5 | #  AZURE OPENAI MISTRAL BEDROCK
  6 | MODEL_PROVIDER = BEDROCK
  7 | 
  8 | [AZURE]
  9 | # omega-gpt4-32k-deployment    omega-gpt4-8k-deployment omega-gpt35turbo text-embedding-ada-002
 10 | AZURE_OPENAI_DEPLOYMENT=omega-gpt4-32k-deployment
 11 | # ada-2 text-embedding-ada-002
 12 | AZURE_OPENAI_EMBEDDING_DEPLOYMENT=ada-2
 13 | AZURE_OPENAI_ENDPOINT=https://azopenai-01-fr.openai.azure.com/
 14 | AZURE_OPENAI_API_VERSION=2023-07-01-preview
 15 | CHAT_MODEL=omega-gpt4-32k-deployment
 16 | 
 17 | [OPENAI]
 18 | # gpt-4-turbo gpt-3.5-turbo gpt-4o
 19 | OPENAI_MODEL_NAME=gpt-4o
 20 | EMBEDDINGS_MODEL=text-embedding-ada-002
 21 | 
 22 | [MISTRAL]
 23 | # mistral-tiny, mistral-small, mistral-medium, mistral-large
 24 | CHAT_MODEL=mistral-large-latest
 25 | EMBEDDINGS_MODEL=mistral-embed
 26 | 
 27 | [BEDROCK]
 28 | #anthropic-claude
 29 | CHAT_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
 30 | CLIENT=anthropic.claude-3-sonnet-20240229-v1:0
 31 | # amazon.titan-embed-image-v1 amazon.titan-embed-text-v1
 32 | EMBEDDINGS_MODEL=amazon.titan-embed-image-v1
 33 | AWS_PROFILE_NAME=
 34 | AWS_REGION_NAME=eu-west-3
 35 | BEDROCK_ENDPOINT_URL=https://bedrock-runtime.eu-west-3.amazonaws.com
 36 | 
 37 | ;CHAT_MODEL=mistral.mistral-large-2402-v1:0
 38 | ;CLIENT=mistral.mistral-large-2402-v1:0
 39 | ;EMBEDDINGS_MODEL=amazon.titan-embed-image-v1
 40 | ;AWS_PROFILE_NAME=
 41 | ;AWS_REGION_NAME=eu-west-3
 42 | ;BEDROCK_ENDPOINT_URL=https://bedrock-runtime.eu-west-3.amazonaws.com
 43 | 
 44 | [VECTORDB]
 45 | # faiss or chroma or opensearch
 46 | # moving to faiss due to "ModuleNotFoundError: No module named 'hnswlib'" issue with ChromaDB
 47 | # ChromaDB 0.4.3 solve the issue but is not supported by llama_index that is compatible with mistralai
 48 | # using latest version of chroma and chroma-hnswlib lib does not make it
 49 | vectordb = chroma
 50 | chroma_persist_directory = data/chroma/
 51 | faiss_persist_directory = data/faiss
 52 | collection_name = Default
 53 | opensearch_url = https://vpc-ai-assistant-ysxpkzzgfwwgfbg35qz3tal4ee.eu-west-1.es.amazonaws.com
 54 | opensearch_aws_region = eu-west-1
 55 | opensearch_bulk_size = 500
 56 | 
 57 | [KNOWLEDGE]
 58 | CHILD_CHUNK_SIZE=400
 59 | CHILD_CHUNK_OVERLAP=20
 60 | 
 61 | PARENT_CHUNK_SIZE=2000
 62 | PARENT_CHUNK_OVERLAP=0
 63 | 
 64 | 
 65 | [LANGCHAIN]
 66 | verbose = True
 67 | # "stuff", "map_reduce", "refine", "map_rerank"
 68 | chain_type = stuff
 69 | # "similarity", "mmr", "similarity_score_threshold"
 70 | SEARCH_TYPE = mmr
 71 | SEARCH_TOP_K = 6
 72 | 
 73 | [CACHE]
 74 | CACHE_FOLDER=data/cache
 75 | 
 76 | [FILE_MANAGEMENT]
 77 | UPLOAD_DIRECTORY=data/cache/upload_directory
 78 | 
 79 | [VISION]
 80 | ; BEDROCK ANTHROPIC CLAUDE 3
 81 | VISION_PROVIDER=BEDROCK
 82 | VISION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
 83 | VISION_EMBEDDINGS=amazon.titan-embed-image-v1
 84 | IMAGE_OUTPUT_DIR=data/cache/vision
 85 | 
 86 | [LLAMA_INDEX]
 87 | LLAMA_INDEX_ROOT_DIR=data/llama_index
 88 | SENTENCE_INDEX_DIR=sentence_index
 89 | MERGING_INDEX_DIR=merging_index
 90 | SUBQUERY_INDEX_DIR=subquery_index
 91 | SUMMARY_INDEX_DIR=summary_index
 92 | 
 93 | [DATACATALOG]
 94 | DATA_CATALOG=data/data-catalog/data-catalog.xlsx
 95 | 
 96 | [DATABASE]
 97 | # ATHENA or DUCKDB
 98 | DATABASE_SOURCE=DUCKDB
 99 | 
100 | [ATHENA]
101 | QUALIFIED_TABLE_NAME=
102 | SCHEMA_PATH=
103 | 
104 | [DUCKDB]
105 | QUALIFIED_TABLE_NAME=data/duckdb...
106 | SCHEMA_PATH=data/duckdb/
107 | 
108 | [DOCUMENTS_STORAGE]
109 | # S3 or LOCAL or NONE
110 | INTERFACE=LOCAL
111 | DOCUMENTS_PATH=data
112 | S3_BUCKET_NAME=
113 | 


--------------------------------------------------------------------------------
/tests/utils/test_utilsrag_lc.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | from dotenv import load_dotenv, find_dotenv
  4 | from langchain_community.document_loaders.pdf import PyPDFLoader
  5 | from langchain_community.vectorstores.chroma import Chroma
  6 | from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
  7 | 
  8 | import numpy as np
  9 | 
 10 | import nest_asyncio
 11 | 
 12 | from rag_assistant.utils.utilsrag_lc import agent_lc_factory
 13 | 
 14 | load_dotenv(find_dotenv())
 15 | 
 16 | # Set OpenAI API key from Streamlit secrets
 17 | openai_api_key = os.getenv('OPENAI_API_KEY')
 18 | 
 19 | nest_asyncio.apply()
 20 | 
 21 | 
 22 | def get_openai_api_key():
 23 |     _ = load_dotenv(find_dotenv())
 24 | 
 25 |     return os.getenv("OPENAI_API_KEY")
 26 | 
 27 | 
 28 | def get_hf_api_key():
 29 |     _ = load_dotenv(find_dotenv())
 30 | 
 31 |     return os.getenv("HUGGINGFACE_API_KEY")
 32 | 
 33 | 
 34 | from trulens_eval import (
 35 |     Feedback,
 36 |     TruLlama,
 37 |     OpenAI,
 38 |     Tru, TruChain, Select
 39 | )
 40 | 
 41 | openai = OpenAI()
 42 | 
 43 | qa_relevance = (
 44 |     Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
 45 |     .on_input_output()
 46 | )
 47 | 
 48 | qs_relevance = (
 49 |     Feedback(openai.relevance_with_cot_reasons, name="Context Relevance")
 50 |     .on_input()
 51 |     .on(TruLlama.select_source_nodes().node.text)
 52 |     .aggregate(np.mean)
 53 | )
 54 | 
 55 | groundedness = (
 56 |     Feedback(openai.groundedness_measure_with_cot_reasons, name = "Groundedness")
 57 |     .on(Select.RecordCalls.retrieve.rets.collect())
 58 |     .on_output()
 59 | )
 60 | 
 61 | feedbacks = [qa_relevance, qs_relevance, groundedness]
 62 | 
 63 | 
 64 | def get_prebuilt_trulens_recorder(query_engine, app_id):
 65 |     tru_recorder = TruChain(
 66 |         query_engine,
 67 |         app_id=app_id,
 68 |         feedbacks=feedbacks
 69 |     )
 70 |     return tru_recorder
 71 | 
 72 | 
 73 | # @pytest.fixture(scope="module")
 74 | def temp_dir(request):
 75 |     # Setup: Create a temporary directory for the test module
 76 |     # TODO should do something with the vectordb
 77 |     pass
 78 | 
 79 | 
 80 | @pytest.fixture
 81 | def llm_prepare():
 82 |     # llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
 83 |     llm = ChatMistralAI()
 84 | 
 85 |     return llm
 86 | 
 87 | 
 88 | @pytest.fixture
 89 | def embeddings_prepare():
 90 |     # embed_model = OpenAIEmbeddings()
 91 |     embed_model = MistralAIEmbeddings()
 92 | 
 93 |     return embed_model
 94 | 
 95 | 
 96 | @pytest.fixture
 97 | def docs_prepare():
 98 |     loader =  PyPDFLoader("tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf")
 99 |     documents = loader.load()
100 |     return documents
101 | 
102 | 
103 | @pytest.fixture
104 | def eval_questions_prepare():
105 |     eval_questions = []
106 |     with open('tests/utils/eval_questions.txt', 'r') as file:
107 |         for line in file:
108 |             # Remove newline character and convert to integer
109 |             item = line.strip()
110 |             print(item)
111 |             eval_questions.append(item)
112 |     return eval_questions
113 | 
114 | 
115 | def test_lc_agent_stuff_4_similarity(llm_prepare, embeddings_prepare, docs_prepare, eval_questions_prepare, trulens_prepare):
116 | 
117 |     db = Chroma.from_documents(
118 |         documents=docs_prepare,
119 |         embedding=embeddings_prepare,
120 |         collection_name="Test_RAG_LC",
121 |     )
122 | 
123 |     retrieval_qa_chain = agent_lc_factory(chain_type="stuff",
124 |                                           llm=llm_prepare,
125 |                                           search_kwargs={"k": 4},
126 |                                           search_type="similarity", vectorstore=db)
127 | 
128 |     response = retrieval_qa_chain("How do I get started on a personal project in AI?")
129 |     print(f"response: {str(response)}")
130 |     assert response is not None, "L'interprétation n'a pas retourné de résultat."


--------------------------------------------------------------------------------
/.github/workflows/aws.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will build and push a new container image to Amazon ECR,
 2 | # and then will deploy a new task definition to Amazon ECS, when there is a push to the "main" branch.
 3 | #
 4 | # To use this workflow, you will need to complete the following set-up steps:
 5 | #
 6 | # 1. Create an ECR repository to store your images.
 7 | #    For example: `aws ecr create-repository --repository-name my-ecr-repo --region us-east-2`.
 8 | #    Replace the value of the `ECR_REPOSITORY` environment variable in the workflow below with your repository's name.
 9 | #    Replace the value of the `AWS_REGION` environment variable in the workflow below with your repository's region.
10 | #
11 | # 2. Create an ECS task definition, an ECS cluster, and an ECS service.
12 | #    For example, follow the Getting Started guide on the ECS console:
13 | #      https://us-east-2.console.aws.amazon.com/ecs/home?region=us-east-2#/firstRun
14 | #    Replace the value of the `ECS_SERVICE` environment variable in the workflow below with the name you set for the Amazon ECS service.
15 | #    Replace the value of the `ECS_CLUSTER` environment variable in the workflow below with the name you set for the cluster.
16 | #
17 | # 3. Store your ECS task definition as a JSON file in your repository.
18 | #    The format should follow the output of `aws ecs register-task-definition --generate-cli-skeleton`.
19 | #    Replace the value of the `ECS_TASK_DEFINITION` environment variable in the workflow below with the path to the JSON file.
20 | #    Replace the value of the `CONTAINER_NAME` environment variable in the workflow below with the name of the container
21 | #    in the `containerDefinitions` section of the task definition.
22 | #
23 | # 4. Store an IAM user access key in GitHub Actions secrets named `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`.
24 | #    See the documentation for each action used below for the recommended IAM policies for this IAM user,
25 | #    and best practices on handling the access key credentials.
26 | 
27 | name: Deploy to Amazon ECS
28 | 
29 | on:
30 |   push:
31 |     branches: [ "main" ]
32 | 
33 | env:
34 |   AWS_REGION: eu-west-1                   # set this to your preferred AWS region, e.g. us-west-1
35 |   ECR_REPOSITORY: ai_assistant           # set this to your Amazon ECR repository name
36 |   ECS_SERVICE: ai_assistant-service-iac-https                 # set this to your Amazon ECS service name
37 |   ECS_CLUSTER: ai_assistant                 # set this to your Amazon ECS cluster name
38 |   CONTAINER_NAME: ai_assistant           # set this to the name of the container in the
39 |                                                # containerDefinitions section of your task definition
40 | 
41 | permissions:
42 |   contents: read
43 | 
44 | jobs:
45 |   deploy:
46 |     name: Deploy
47 |     runs-on: ubuntu-latest
48 |     environment: production
49 | 
50 |     steps:
51 |     - name: Checkout
52 |       uses: actions/checkout@v3
53 | 
54 |     - name: Configure AWS credentials
55 |       uses: aws-actions/configure-aws-credentials@v1
56 |       with:
57 |         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
58 |         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
59 |         aws-region: ${{ env.AWS_REGION }}
60 | 
61 |     - name: Login to Amazon ECR
62 |       id: login-ecr
63 |       uses: aws-actions/amazon-ecr-login@v1
64 | 
65 |     - name: Build, tag, and push image to Amazon ECR
66 |       id: build-image
67 |       env:
68 |         ECR_REGISTRY: 441525731509.dkr.ecr.eu-west-1.amazonaws.com
69 |         IMAGE_TAG: latest
70 |       run: |
71 |         # Build a docker container and
72 |         # push it to ECR so that it can
73 |         # be deployed to ECS.
74 |         docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG .
75 |         docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG
76 |         echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT
77 | 
78 |     - name: Deploy to ECS
79 |       run: |
80 |         aws ecs update-service --cluster $ECS_CLUSTER --service $ECS_SERVICE --force-new-deployment
81 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/config.md:
--------------------------------------------------------------------------------
 1 | ## III. Configuration
 2 | ### Stockez la configuration dans l'environnement
 3 | 
 4 | La *configuration* d'une application est tout ce qui est susceptible de varier entre des [déploiements](./codebase) (validation, production, environnement de développement, etc.). Cela inclut :
 5 | 
 6 | * Les ressources gérées par la base de données, Memcached, ou tout autre [service de stockage](./backing-services)
 7 | * Les identifiants pour des services externes, tel qu'Amazon S3 ou Twitter
 8 | * Les valeurs spécifiques au déploiement, tel que son nom d'hôte canonique
 9 | 
10 | Les applications stockent parfois la configuration avec des constantes dans le code. C'est une violation des 12 facteurs, qui requiert une **stricte séparation de la configuration et du code**. La configuration peut varier substantiellement à travers les déploiements, alors que ce n'est pas le cas du code.
11 | 
12 | Un bon moyen de tester si une application a correctement séparé son code, c'est de se demander si l'application pourrait être rendue open-source à tout instant, sans compromettre d'identifiants.
13 | 
14 | Notez que cette définition de "configuration" n'inclut **pas** la configuration interne de l'application, tel que `config/routes.rb` avec Rails, ou comment [les modules du noyau sont connectés (en)](http://docs.spring.io/spring/docs/current/spring-framework-reference/html/beans.html) dans [Spring](http://spring.io/). Ce type de configuration ne varie pas à travers les déploiements, et est ainsi mieux réalisé dans le code.
15 | 
16 | Une autre approche de la configuration, c'est d'utiliser des fichiers de configuration qui ne sont pas inclus dans le système de contrôle de version, par exemple `config/database.yml` de Rails. C'est une amélioration considérable par rapport à l'utilisation de constantes qui sont versionnées dans le dépôt de code, mais a toujours des faiblesses : il est facile d'ajouter par inadvertance un fichier de configuration dans le dépôt. Il y a une tendance à ce que les fichiers de configuration soient dispersés à différents endroits et dans différents formats, rendant ainsi difficile de voir et gérer la configuration à un unique endroit. De plus, ces formats ont tendance à être spécifiques à un langage ou un framework.
17 | 
18 | **Les applications 12 facteurs stockent la configuration dans des *variables d'environnement*** (souvent raccourcies en *variables d'env*, ou *env*). Les variables d'environnement sont faciles à changer entre des déploiements sans changer le moindre code ; contrairement aux fichiers de configuration, il y a peu de chance pour qu'elles soient ajoutées au dépôt de code accidentellement ; et contrairement aux fichiers de configuration personnalisés, ou tout autre mécanisme de configuration comme les propriétés système Java, ce sont des standards agnostiques du langage ou du système d'exploitation.
19 | 
20 | Un autre aspect de la gestion de configuration est le groupage. Parfois, les applications regroupent la configuration dans des groupes nommés (souvent appelés les "environnements"), nommés ainsi d'après des déploiements spécifiques, comme les environnements `development`, `test`, et `production` de Rails. Cette méthode ne permet pas de grossir proprement : lorsque l'on ajoute de nouveaux déploiement à l'application, de nouveaux noms d'environnement sont nécessaires, comme `validation` ou `qa`. Quand le projet grossit encore plus, les développeurs vont avoir tendance à ajouter leurs propres environnements particuliers, comme `joes-validation`, ce qui entraîne une explosion combinatoire de la configuration qui rend la gestion des déploiements de l'application très fragile.
21 | 
22 | Dans une application 12 facteurs, les variables d'environnement permettent un contrôle granulaire, chacune complètement orthogonale aux autres variables d'environnement. Elles ne sont jamais groupées ensemble en "environnements", mais sont plutôt gérées indépendamment pour chaque déploiement. C'est un modèle qui permet de grossir verticalement en souplesse, lorsque l'application grossit naturellement en un plus grand nombre de déploiements au cours de sa vie.
23 | 


--------------------------------------------------------------------------------
/rag_assistant/pages/2_Load_Document.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | import streamlit as st
  4 | import os
  5 | import io
  6 | 
  7 | from llama_index.core import SimpleDirectoryReader
  8 | from llama_index.core.schema import Document as LIDocument
  9 | 
 10 | from utils.auth import check_password
 11 | from utils.constants import DocumentType, SupportedFileType, Metadata, CollectionType
 12 | from utils.config_loader import load_config
 13 | from utils.utilsdoc import load_doc, load_store
 14 | from utils.utilsrag_li import build_summary_index
 15 | 
 16 | from utils.utilsvision import load_image
 17 | from utils.utilsfile import put_file
 18 | 
 19 | config = load_config()
 20 | 
 21 | app_name = config['DEFAULT']['APP_NAME']
 22 | collection_name = config['VECTORDB']['collection_name']
 23 | upload_directory = config['FILE_MANAGEMENT']['UPLOAD_DIRECTORY']
 24 | 
 25 | st.set_page_config(page_title=f"""📄 {app_name} 🤗""", page_icon="📄")
 26 | 
 27 | 
 28 | def main():
 29 |     st.title(f"""Chargement des Connaissances 📄""")
 30 | 
 31 |     # with st.form("Upload File"):
 32 |     topic_name = st.text_input("Thème du document (ex: API, Cloud, Data, Architecture, Sécurité, ...)")
 33 | 
 34 |     file_type = st.radio("Type de document", [e.value for e in DocumentType], index=None)
 35 | 
 36 |     pdfs = st.file_uploader("Document(s) à transmettre", type=[e.value for e in SupportedFileType],
 37 |                             accept_multiple_files=True)
 38 | 
 39 |     disabled = True
 40 |     if (file_type is not None) and (topic_name is not None) and (pdfs is not None) and (len(pdfs)):
 41 |         disabled = False
 42 | 
 43 | 
 44 |     with st.container():
 45 |         st.subheader("Traitement des images")
 46 |         analyse_images = st.checkbox("Analyser les images")
 47 |         image_only = st.checkbox("Traiter uniquement les images (test mode)", disabled=(not analyse_images))
 48 |         restart_image_analysis = st.checkbox("Relancer l'analyse d'image (test mode)", disabled=(not analyse_images))
 49 | 
 50 |     with st.container():
 51 |         st.subheader("Autres options")
 52 |         generate_summary = st.checkbox("Générer le sommaire", disabled=True)
 53 |         upload_only = st.checkbox("Enregistrement des documents uniquement")
 54 | 
 55 |     if st.button("Transmettre", disabled=disabled):
 56 | 
 57 | 
 58 |         upload_files(analyse_images, file_type, generate_summary, image_only, pdfs, restart_image_analysis, topic_name, upload_only)
 59 | 
 60 | 
 61 | def upload_files(analyse_images, file_type, generate_summary, image_only, pdfs, restart_image_analysis, topic_name, upload_only):
 62 |     file_paths = []
 63 |     if not os.path.exists(upload_directory):
 64 |         os.makedirs(upload_directory)
 65 |     for pdf in pdfs:
 66 |         file_path = os.path.join(upload_directory, pdf.name)
 67 |         with open(file_path, 'wb') as f:
 68 |             f.write(pdf.read())
 69 |         file_paths.append(file_path)
 70 |         put_file(io.BytesIO(pdf.getvalue()), pdf.name, CollectionType.DOCUMENTS.value)
 71 |     metadata = {Metadata.DOCUMENT_TYPE.value: file_type, Metadata.TOPIC.value: topic_name}
 72 |     docs = []
 73 |     if not image_only:
 74 |         docs += load_doc(pdfs, metadata)
 75 |     if analyse_images:
 76 |         image_docs = load_image(pdfs, metadata, restart_image_analysis)
 77 |         docs += image_docs
 78 |     if not upload_only:
 79 |         load_store(docs, collection_name=collection_name)
 80 |     if generate_summary:
 81 |         docs_li = docs_prepare(
 82 |             #input_files=file_paths,
 83 |             input_dir=upload_directory
 84 |         )
 85 |         summary_index = build_summary_index(docs_li)
 86 | 
 87 | 
 88 | def docs_prepare(input_files: Optional[list[str]] = None, input_dir: Optional[str] = None) -> list[LIDocument]:
 89 |     documents = SimpleDirectoryReader(
 90 |         input_files=input_files,
 91 |         input_dir=input_dir,
 92 |         required_exts=["."+e.value for e in SupportedFileType]
 93 |     ).load_data()
 94 |     return documents
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     if not check_password():
 99 |         # Do not continue if check_password is not True.
100 |         st.stop()
101 |     main()
102 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/en/dev-prod-parity.md:
--------------------------------------------------------------------------------
 1 | ## X. Dev/prod parity
 2 | ### Keep development, staging, and production as similar as possible
 3 | 
 4 | Historically, there have been substantial gaps between development (a developer making live edits to a local [deploy](./codebase) of the app) and production (a running deploy of the app accessed by end users).  These gaps manifest in three areas:
 5 | 
 6 | * **The time gap**: A developer may work on code that takes days, weeks, or even months to go into production.
 7 | * **The personnel gap**: Developers write code, ops engineers deploy it.
 8 | * **The tools gap**: Developers may be using a stack like Nginx, SQLite, and OS X, while the production deploy uses Apache, MySQL, and Linux.
 9 | 
10 | **The twelve-factor app is designed for [continuous deployment](http://avc.com/2011/02/continuous-deployment/) by keeping the gap between development and production small.**  Looking at the three gaps described above:
11 | 
12 | * Make the time gap small: a developer may write code and have it deployed hours or even just minutes later.
13 | * Make the personnel gap small: developers who wrote code are closely involved in deploying it and watching its behavior in production.
14 | * Make the tools gap small: keep development and production as similar as possible.
15 | 
16 | Summarizing the above into a table:
17 | 
18 | <table>
19 |   <tr>
20 |     <th></th>
21 |     <th>Traditional app</th>
22 |     <th>Twelve-factor app</th>
23 |   </tr>
24 |   <tr>
25 |     <th>Time between deploys</th>
26 |     <td>Weeks</td>
27 |     <td>Hours</td>
28 |   </tr>
29 |   <tr>
30 |     <th>Code authors vs code deployers</th>
31 |     <td>Different people</td>
32 |     <td>Same people</td>
33 |   </tr>
34 |   <tr>
35 |     <th>Dev vs production environments</th>
36 |     <td>Divergent</td>
37 |     <td>As similar as possible</td>
38 |   </tr>
39 | </table>
40 | 
41 | [Backing services](./backing-services), such as the app's database, queueing system, or cache, is one area where dev/prod parity is important.  Many languages offer libraries which simplify access to the backing service, including *adapters* to different types of services.  Some examples are in the table below.
42 | 
43 | <table>
44 |   <tr>
45 |     <th>Type</th>
46 |     <th>Language</th>
47 |     <th>Library</th>
48 |     <th>Adapters</th>
49 |   </tr>
50 |   <tr>
51 |     <td>Database</td>
52 |     <td>Ruby/Rails</td>
53 |     <td>ActiveRecord</td>
54 |     <td>MySQL, PostgreSQL, SQLite</td>
55 |   </tr>
56 |   <tr>
57 |     <td>Queue</td>
58 |     <td>Python/Django</td>
59 |     <td>Celery</td>
60 |     <td>RabbitMQ, Beanstalkd, Redis</td>
61 |   </tr>
62 |   <tr>
63 |     <td>Cache</td>
64 |     <td>Ruby/Rails</td>
65 |     <td>ActiveSupport::Cache</td>
66 |     <td>Memory, filesystem, Memcached</td>
67 |   </tr>
68 | </table>
69 | 
70 | Developers sometimes find great appeal in using a lightweight backing service in their local environments, while a more serious and robust backing service will be used in production.  For example, using SQLite locally and PostgreSQL in production; or local process memory for caching in development and Memcached in production.
71 | 
72 | **The twelve-factor developer resists the urge to use different backing services between development and production**, even when adapters theoretically abstract away any differences in backing services.  Differences between backing services mean that tiny incompatibilities crop up, causing code that worked and passed tests in development or staging to fail in production.  These types of errors create friction that disincentivizes continuous deployment.  The cost of this friction and the subsequent dampening of continuous deployment is extremely high when considered in aggregate over the lifetime of an application.
73 | 
74 | Lightweight local services are less compelling than they once were.  Modern backing services such as Memcached, PostgreSQL, and RabbitMQ are not difficult to install and run thanks to modern packaging systems, such as [Homebrew](http://mxcl.github.com/homebrew/) and [apt-get](https://help.ubuntu.com/community/AptGet/Howto).  Alternatively, declarative provisioning tools such as [Chef](http://www.opscode.com/chef/) and [Puppet](http://docs.puppetlabs.com/) combined with light-weight virtual environments such as [Docker](https://www.docker.com/) and [Vagrant](http://vagrantup.com/) allow developers to run local environments which closely approximate production environments. The cost of installing and using these systems is low compared to the benefit of dev/prod parity and continuous deployment.
75 | 
76 | Adapters to different backing services are still useful, because they make porting to new backing services relatively painless.  But all deploys of the app (developer environments, staging, production) should be using the same type and version of each of the backing services.
77 | 


--------------------------------------------------------------------------------
/terraform/aws/ecs.tf:
--------------------------------------------------------------------------------
  1 | resource "aws_ecs_cluster" "ai_assistant_cluster" {
  2 |   name = "ai_assistant"
  3 |   configuration {
  4 |     execute_command_configuration {
  5 |       kms_key_id = aws_kms_key.key.arn
  6 |       logging    = "OVERRIDE"
  7 | 
  8 |       log_configuration {
  9 |         cloud_watch_encryption_enabled = true
 10 |         cloud_watch_log_group_name     = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.name
 11 |       }
 12 |     }
 13 |   }
 14 | }
 15 | 
 16 | resource "aws_ecs_cluster_capacity_providers" "cluster" {
 17 |   cluster_name = aws_ecs_cluster.ai_assistant_cluster.name
 18 | 
 19 |   capacity_providers = ["FARGATE", "FARGATE_SPOT"]
 20 | 
 21 |   default_capacity_provider_strategy {
 22 |     capacity_provider = "FARGATE_SPOT"
 23 |   }
 24 | }
 25 | 
 26 | resource "aws_cloudwatch_log_group" "ai_assistant-cloudwatch-log" {
 27 |   name = "/ecs/ai_assistant-taskdef-iac-https"
 28 | }
 29 | 
 30 | resource "aws_ecs_service" "ai_assistant_service" {
 31 |   name                 = "ai_assistant-service-iac-https"
 32 |   cluster              = aws_ecs_cluster.ai_assistant_cluster.id
 33 |   task_definition      = aws_ecs_task_definition.ai_assistant_task_definition.arn
 34 |   force_new_deployment = true
 35 |   capacity_provider_strategy {
 36 |     capacity_provider = "FARGATE_SPOT"
 37 |     base              = 1
 38 |     weight            = 1
 39 |   }
 40 |   network_configuration {
 41 |     subnets          = [data.aws_subnet.ai_assistant_subnet_1.id, data.aws_subnet.ai_assistant_subnet_2.id, data.aws_subnet.ai_assistant_subnet_3.id]
 42 |     security_groups  = [aws_security_group.ai_assistant_security_group.id]
 43 |     assign_public_ip = true
 44 |   }
 45 |   deployment_circuit_breaker {
 46 |     enable   = true
 47 |     rollback = true
 48 |   }
 49 |   desired_count = 1
 50 | 
 51 |   load_balancer {
 52 |     target_group_arn = aws_lb_target_group.ai_assistant_target_group_https.arn
 53 |     container_name   = "ai_assistant_https"
 54 |     container_port   = 80
 55 |   }
 56 | 
 57 |   depends_on = [aws_lb_listener.application_lb_listener]
 58 | }
 59 | 
 60 | resource "aws_ecs_task_definition" "ai_assistant_task_definition" {
 61 |   family                   = "ai_assistant-taskdef-iac-https"
 62 |   network_mode             = "awsvpc"
 63 |   requires_compatibilities = ["FARGATE"]
 64 | 
 65 |   cpu    = "512"
 66 |   memory = "1024"
 67 |   volume {
 68 |     name = "efs-volume"
 69 |     efs_volume_configuration {
 70 |       file_system_id     = aws_efs_file_system.ai_assistant_efs_file_system.id
 71 |       root_directory     = "/"
 72 |       transit_encryption = "ENABLED"
 73 |     }
 74 |   }
 75 |   volume {
 76 |     name = "configuration-secret"
 77 |     efs_volume_configuration {
 78 |       file_system_id     = aws_efs_file_system.ai_assistant_efs_file_system.id
 79 |       root_directory     = "/"
 80 |       transit_encryption = "ENABLED"
 81 |     }
 82 |   }
 83 |   execution_role_arn = aws_iam_role.ai_assistant_ecs_execution_role.arn
 84 |   task_role_arn      = aws_iam_role.ai_assistant_ecs_execution_role.arn
 85 | 
 86 |   container_definitions = jsonencode([{
 87 |     name   = "ai_assistant_https"
 88 |     image  = var.ecr_image_url
 89 |     cpu    = 512
 90 |     memory = 1024
 91 |     runtime_platform = {
 92 |       "cpuArchitecture" : "X86_64",
 93 |       "operatingSystemFamily" : "LINUX"
 94 |     }
 95 |     mountPoints = [{
 96 |       sourceVolume  = "efs-volume"
 97 |       containerPath = "/app/data/chroma"
 98 |       readOnly      = false
 99 |     },
100 |     {
101 |       sourceVolume  = "configuration-secret"
102 |       containerPath = "/app/.streamlit"
103 |       readOnly      = false
104 |     }]
105 |     memoryReservation = 1024
106 |     portMappings = [{
107 |       name          = "ai_assistant-80-tcp"
108 |       containerPort = 80
109 |       hostPort      = 80
110 |       appProtocol   = "http"
111 |     }]
112 |     logConfiguration = {
113 |       logDriver = "awslogs"
114 |       options = {
115 |         awslogs-create-group  = "true"
116 |         awslogs-group         = "/ecs/ai_assistant-taskdef-iac"
117 |         awslogs-region        = "eu-west-1"
118 |         awslogs-stream-prefix = "ecs"
119 |       },
120 |     }
121 |     essential = true
122 |     environment = [
123 |       {
124 |         "name" : "LANGCHAIN_TRACING_V2",
125 |         "value": "${var.langchain_tracing_v2_bool}"
126 |       }
127 |     ]
128 |     secrets = [
129 |       {
130 |         "name" : "OPENAI_API_KEY",
131 |         "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.openai_key_name}::"
132 |       },
133 |       {
134 |         "name" : "MISTRAL_API_KEY",
135 | 
136 |         "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.mistral_key_name}::"
137 |       },
138 |       {
139 |         "name" : "HF_TOKEN",
140 | 
141 |         "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.hf_token_name}::"
142 |       },
143 |       {
144 |         "name" : "LANGCHAIN_API_KEY",
145 | 
146 |         "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.langchain_key_name}::"
147 |       }
148 |     ]
149 |   }])
150 | }
151 | 


--------------------------------------------------------------------------------
/data/sources/md/12factor/fr/dev-prod-parity.md:
--------------------------------------------------------------------------------
 1 | ## X. Parité dev/prod
 2 | ### Gardez le développement, la validation et la production aussi proches que possible
 3 | 
 4 | Historiquement, il y a eu un fossé conséquent entre le développement (un développeur qui fait des modifications sur un [déploiement](./codebase) local de l'application) et la production (un déploiement de l'application accessible aux utilisateurs finaux). Ce fossé se manifeste de trois manières :
 5 | 
 6 | * **Le fossé temporel** : un développeur peut travailler sur du code qui peut prendre des jours, des semaines ou des mois avant d'aller en production
 7 | * **Le fossé des personnes** : les développeurs écrivent le code, et d'autres personnes le déploient.
 8 | * **Le fossé des outils** : les développeurs peuvent utiliser une pile comme Nginx, SQLite, et OS X, alors que le déploiement de production utilise Apache, MySQL, et Linux.
 9 | 
10 | **Les applications 12 facteurs sont conçues pour le [déploiement continu (en)](http://avc.com/2011/02/continuous-deployment/) en gardant un fossé étroit entre le développement et la production.** Si l'on regarde les trois fossés décrits plus haut :
11 | 
12 | * Réduire le fossé temporel : un développeur peut écrire du code et le déployer quelques heures ou même juste quelques minutes plus tard.
13 | * Réduire le fossé des personnes : les personnes qui écrivent le code sont impliquées dans son déploiement et pour surveiller son comportement en production.
14 | * Réduire le fossé des outils : réduire, autant que possible, les différences entre le développement et la production.
15 | 
16 | Si l'on résume cela en un tableau :
17 | 
18 | <table>
19 |   <tr>
20 |     <th></th>
21 |     <th>Application traditionnelle</th>
22 |     <th>Application 12 facteurs</th>
23 |   </tr>
24 |   <tr>
25 |     <th>Temps entre les déploiements</th>
26 |     <td>Semaines</td>
27 |     <td>Heures</td>
28 |   </tr>
29 |   <tr>
30 |     <th>Auteurs du code et ceux qui le déploient</th>
31 |     <td>Des personnes différentes</td>
32 |     <td>Les mêmes personnes</td>
33 |   </tr>
34 |   <tr>
35 |     <th>L'environnement de développement et celui de production</th>
36 |     <td>Divergents</td>
37 |     <td>Aussi similaires que possible</td>
38 |   </tr>
39 | </table>
40 | 
41 | [Les services externes](./backing-services), tels que la base de données, la file de messages, ou le cache sont des éléments importants de la parité développement/production. La plupart des langages fournissent des bibliothèques qui simplifient l'accès à ces services externes, en fournissant des adaptateurs pour différents types de services. Voici quelques exemples dans le tableau ci-dessous.
42 | 
43 | <table>
44 |   <tr>
45 |     <th>Type</th>
46 |     <th>Langage</th>
47 |     <th>Librairie</th>
48 |     <th>Adaptateurs</th>
49 |   </tr>
50 |   <tr>
51 |     <td>Base de données</td>
52 |     <td>Ruby/Rails</td>
53 |     <td>ActiveRecord</td>
54 |     <td>MySQL, PostgreSQL, SQLite</td>
55 |   </tr>
56 |   <tr>
57 |     <td>File de messages</td>
58 |     <td>Python/Django</td>
59 |     <td>Celery</td>
60 |     <td>RabbitMQ, Beanstalkd, Redis</td>
61 |   </tr>
62 |   <tr>
63 |     <td>Cache</td>
64 |     <td>Ruby/Rails</td>
65 |     <td>ActiveSupport::Cache</td>
66 |     <td>Mémoire, système de fichiers, Memcached</td>
67 |   </tr>
68 | </table>
69 | 
70 | Les développeurs trouvent parfois agréable d'utiliser des services externes légers dans leur environnement local, alors qu'un service externe plus sérieux et robuste est utilisé en production. Par exemple, utiliser SQLite en local, et PostgreSQL en production; ou bien, durant le développement, mettre les données en cache dans la mémoire des processus locaux, et utiliser Memcached en production.
71 | 
72 | **Les développeurs des applications 12 facteurs résistent au besoin d'utiliser des services externes différents entre le développement local et la production**, même lorsque les adaptateurs permettent d'abstraire en théorie beaucoup de différences entre les services externes. Les différences entre les services externes signifient que de petites incompatibilités surviennent, ce qui va faire que du code qui fonctionnait et qui passait les tests durant le développement ou la validation ne fonctionnera pas en production. Ce type d'erreurs crée de la friction en défaveur du déploiement continu. Le coût de cette friction et son impact négatif sur le déploiement continu est extrêmement élevé lorsqu'il est cumulé sur toute la vie de l'application.
73 | 
74 | Les services locaux légers sont moins attirants aujourd'hui qu'ils ne l'étaient autrefois. Les services externes modernes tels que Memcached, PostgreSQL, et RabbitMQ ne sont pas difficiles à installer et à faire fonctionner grâce aux systèmes de paquets modernes comme [Homebrew](http://mxcl.github.com/homebrew/) et [apt-get](https://help.ubuntu.com/community/AptGet/Howto). Autre possibilité, des outils de provisionnement comme [Chef](http://www.opscode.com/chef/) et [Puppet](http://docs.puppetlabs.com/), combinés à des environnements virtuels légers comme [Docker](https://www.docker.com/) et [Vagrant](http://vagrantup.com/) permettent aux développeurs de faire fonctionner des environnements locaux qui reproduisent de très près les environnements de production. Le coût d'installation et d'utilisation de ces systèmes est faible comparé aux bénéfices d'une bonne parité développement/production et du déploiement continu.
75 | 
76 | Les adaptateurs à ces différents systèmes externes sont malgré tout utiles, car ils rendent le portage vers de nouveaux services externes relativement indolores. Mais tous les déploiements de l'application (environnement de développement, validation, production) devraient utiliser le même type et la même version de chacun de ces services externes.
77 | 


--------------------------------------------------------------------------------
/tests/rag_bedrock/test_rag_langchain.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import pytest
  4 | 
  5 | 
  6 | from rag_bedrock.base import LangchainTestRAGHelper
  7 | 
  8 | 
  9 | @pytest.mark.usefixtures("trulens_prepare",
 10 |                          "bedrock_prepare",
 11 |                          "documents_prepare",
 12 |                          "llm_prepare",
 13 |                          "embeddings_prepare",
 14 |                          "eval_questions_prepare",
 15 |                          "trulens_context_prepare",
 16 |                          "provider_prepare",
 17 |                          "rag_prepare",
 18 |                          "feedbacks_prepare")
 19 | class TestRAGLangChainClaude3SonnetTitanEmbedV1(LangchainTestRAGHelper):
 20 | 
 21 |     @property
 22 |     def test_name(self):
 23 |         return "Langchain_Claude_3_Sonnet_Titan_Embed_V1"
 24 | 
 25 |     @property
 26 |     def model_id(self):
 27 |         return "anthropic.claude-3-sonnet-20240229-v1:0"
 28 | 
 29 |     @property
 30 |     def embedding_model_id(self):
 31 |         return "amazon.titan-embed-text-v1"
 32 | 
 33 | 
 34 | @pytest.mark.usefixtures("trulens_prepare",
 35 |                          "bedrock_prepare",
 36 |                          "documents_prepare",
 37 |                          "llm_prepare",
 38 |                          "embeddings_prepare",
 39 |                          "eval_questions_prepare",
 40 |                          "trulens_context_prepare",
 41 |                          "provider_prepare",
 42 |                          "rag_prepare",
 43 |                          "feedbacks_prepare")
 44 | class TestRAGLangChainClaude3SonnetTitanEmbedV2(LangchainTestRAGHelper):
 45 | 
 46 |     @property
 47 |     def test_name(self):
 48 |         return "Langchain_Claude_3_Sonnet_Titan_Embed_V2"
 49 | 
 50 |     @property
 51 |     def model_id(self):
 52 |         return "anthropic.claude-3-sonnet-20240229-v1:0"
 53 | 
 54 |     @property
 55 |     def embedding_model_id(self):
 56 |         return "amazon.titan-embed-text-v2:0"
 57 | 
 58 | 
 59 | @pytest.mark.usefixtures("trulens_prepare",
 60 |                          "bedrock_prepare",
 61 |                          "documents_prepare",
 62 |                          "llm_prepare",
 63 |                          "embeddings_prepare",
 64 |                          "trulens_context_prepare",
 65 |                          "provider_prepare",
 66 |                          "eval_questions_prepare",
 67 |                          "rag_prepare",
 68 |                          "feedbacks_prepare")
 69 | class TestRAGLangChainMistralLargeTitanEmbedV1(LangchainTestRAGHelper):
 70 | 
 71 |     @property
 72 |     def test_name(self):
 73 |         return "Langchain_Mistral_Large_Titan_Embed_V1"
 74 | 
 75 |     @property
 76 |     def model_id(self):
 77 |         return "mistral.mistral-large-2402-v1:0"
 78 | 
 79 |     @property
 80 |     def embedding_model_id(self):
 81 |         return "amazon.titan-embed-text-v1"
 82 | 
 83 | 
 84 | @pytest.mark.usefixtures("trulens_prepare",
 85 |                          "bedrock_prepare",
 86 |                          "documents_prepare",
 87 |                          "llm_prepare",
 88 |                          "embeddings_prepare",
 89 |                          "trulens_context_prepare",
 90 |                          "provider_prepare",
 91 |                          "eval_questions_prepare",
 92 |                          "rag_prepare",
 93 |                          "feedbacks_prepare")
 94 | class TestRAGLangChainMistralLargeTitanEmbedV2(LangchainTestRAGHelper):
 95 | 
 96 |     @property
 97 |     def test_name(self):
 98 |         return "Langchain_Mistral_Large_Titan_Embed_V2"
 99 | 
100 |     @property
101 |     def model_id(self):
102 |         return "mistral.mistral-large-2402-v1:0"
103 | 
104 |     @property
105 |     def embedding_model_id(self):
106 |         return "amazon.titan-embed-text-v2:0"
107 | 
108 | 
109 | @pytest.mark.usefixtures("trulens_prepare",
110 |                          "bedrock_prepare",
111 |                          "documents_prepare",
112 |                          "llm_prepare",
113 |                          "embeddings_prepare",
114 |                          "trulens_context_prepare",
115 |                          "provider_prepare",
116 |                          "eval_questions_prepare",
117 |                          "rag_prepare",
118 |                          "feedbacks_prepare")
119 | class TestRAGLangChainMistralLargeTitanEmbedMultiModal(LangchainTestRAGHelper):
120 | 
121 |     @property
122 |     def test_name(self):
123 |         return "Langchain_Mistral_Large_Titan_Multimodal"
124 | 
125 |     @property
126 |     def model_id(self):
127 |         return "mistral.mistral-large-2402-v1:0"
128 | 
129 |     @property
130 |     def embedding_model_id(self):
131 |         return "amazon.titan-embed-image-v1"
132 | 
133 | 
134 | @pytest.mark.usefixtures("trulens_prepare",
135 |                          "bedrock_prepare",
136 |                          "documents_prepare",
137 |                          "llm_prepare",
138 |                          "embeddings_prepare",
139 |                          "trulens_context_prepare",
140 |                          "provider_prepare",
141 |                          "eval_questions_prepare",
142 |                          "rag_prepare",
143 |                          "feedbacks_prepare")
144 | class TestRAGLangChainMistralLargeCohereEmbedMultiLingual(LangchainTestRAGHelper):
145 | 
146 |     @property
147 |     def test_name(self):
148 |         return "Langchain_Mistral_Large_Cohere_Embed"
149 | 
150 |     @property
151 |     def model_id(self):
152 |         return "mistral.mistral-large-2402-v1:0"
153 | 
154 |     @property
155 |     def embedding_model_id(self):
156 |         return "cohere.embed-multilingual-v3"
157 | 


--------------------------------------------------------------------------------
/rag_assistant/shared/rag_prompts.py:
--------------------------------------------------------------------------------
  1 | __template__ = """Answer the following questions as best you can. You have access to the following tools:
  2 | 
  3 |             {tools}
  4 | 
  5 |             Use the following format:
  6 | 
  7 |             Question: the input question you must answer
  8 |             Thought: you should always think about what to do
  9 |             Action: the action to take, should be one of [{tool_names}]
 10 |             Action Input: the input to the action
 11 |             Observation: the result of the action
 12 |             ... (this Thought/Action/Action Input/Observation can repeat N times)
 13 |             Thought: I now know the final answer
 14 |             Final Answer: the final answer to the original input question
 15 | 
 16 |             Only use information provided in the context. 
 17 |             Check your output and make sure it conforms!
 18 |             DO NOT output an action and a final answer at the same time.
 19 |             NEVER output a final answer if you are still expecting to receive the response of a tool.
 20 | 
 21 |             Begin!"""
 22 | 
 23 | __structured_chat_agent__ = '''Respond to the human as helpfully and accurately as possible. 
 24 |     You have access to the following tools:
 25 | 
 26 |     {tools}
 27 | 
 28 |     Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
 29 | 
 30 |     Valid "action" values: "Final Answer" or {tool_names}
 31 | 
 32 |     Provide only ONE action per $JSON_BLOB, as shown:
 33 | 
 34 |     ```
 35 |     {{
 36 |       "action": $TOOL_NAME,
 37 |       "action_input": $INPUT
 38 |     }}
 39 |     ```
 40 | 
 41 |     Follow this format:
 42 | 
 43 |     Question: input question to answer
 44 |     Thought: consider previous and subsequent steps
 45 |     Action:
 46 |     ```
 47 |     $JSON_BLOB
 48 |     ```
 49 |     Observation: action result
 50 |     ... (repeat Thought/Action/Observation N times)
 51 |     Thought: I know what to respond
 52 |     Action:
 53 |     ```
 54 |     {{
 55 |       "action": "Final Answer",
 56 |       "action_input": "Final response to human"
 57 |     }}
 58 | 
 59 |     Reminder to ALWAYS respond with a valid json blob of a single action.
 60 |     Do not respond directly to question. Only use information provided in the context.
 61 |     Use tools to retrieve relevant information. 
 62 |     DO NOT output an action and a final answer at the same time.
 63 |     Format is Action:```$JSON_BLOB``` then Observation
 64 |      
 65 |     Begin! '''
 66 | 
 67 | 
 68 | __template2__ = """You are an assistant designed to guide users through a structured risk assessment questionnaire for cloud deployment. 
 69 |     The questionnaire is designed to cover various pillars essential for cloud architecture,
 70 |      including security, compliance, availability, access methods, data storage, processing, performance efficiency,
 71 |       cost optimization, and operational excellence.
 72 | 
 73 |     For each question, you are to follow the "Chain of Thought" process. This means that for each user's response, you will:
 74 | 
 75 |     - Acknowledge the response,
 76 |     - Reflect on the implications of the choice,
 77 |     - Identify any risks associated with the selected option,
 78 |     - Suggest best practices and architecture patterns that align with the user’s selection,
 79 |     - Guide them to the next relevant question based on their previous answers.
 80 | 
 81 |     Your objective is to ensure that by the end of the questionnaire, the user has a clear understanding of the appropriate architecture and services needed for a secure, efficient, and compliant cloud deployment. Remember to provide answers in a simple, interactive, and concise manner.
 82 | 
 83 |     Process:
 84 | 
 85 |     1. Begin by introducing the purpose of the assessment and ask the first question regarding data security and compliance.
 86 |     2. Based on the response, discuss the chosen level of data security, note any specific risks or requirements, 
 87 |      and recommend corresponding cloud services or architectural patterns.
 88 |     3. Proceed to the next question on application availability. Once the user responds,
 89 |      reflect on the suitability of their choice for their application's criticality and suggest availability configurations.
 90 |     4. For questions on access methods and data storage,
 91 |      provide insights on securing application access points or optimizing data storage solutions.
 92 |     5. When discussing performance efficiency,
 93 |      highlight the trade-offs between performance and cost, and advise on scaling strategies.
 94 |     6. In the cost optimization section,
 95 |      engage in a brief discussion on budgetary constraints and recommend cost-effective cloud resource management.
 96 |     7. Conclude with operational excellence,
 97 |      focusing on automation and monitoring,
 98 |       and propose solutions for continuous integration and deployment.
 99 |     8. After the final question,
100 |      summarize the user's choices and their implications for cloud architecture.
101 |     9. Offer a brief closing statement that reassures the user of the assistance provided
102 |      and the readiness of their cloud deployment strategy.
103 | 
104 |     Keep the interactions focused on architectural decisions without diverting to other unrelated topics. 
105 |     You are not to perform tasks outside the scope of the questionnaire, 
106 |     such as executing code or accessing external databases. 
107 |     Your guidance should be solely based on the information provided by the user in the context of the questionnaire.
108 |     Always answer in French. 
109 |     {context}
110 |     Question: {question}
111 |     Helpful Answer:"""
112 | 
113 | 
114 | human = '''{input}
115 | 
116 |     {agent_scratchpad}'''
117 | 


--------------------------------------------------------------------------------
/rag_assistant/pages/3_RAG_Admin.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | 
  3 | import json
  4 | 
  5 | from utils.auth import check_password
  6 | from langchain_community.vectorstores import OpenSearchVectorSearch
  7 | 
  8 | from utils.constants import DocumentType, ChunkType, Metadata, CollectionType
  9 | from utils.utilsdoc import get_store, empty_store, extract_unique_name, get_collection_count, get_metadatas, delete_documents_by_type_and_name
 10 | from utils.utilsfile import list_files, delete_file
 11 | from utils.config_loader import load_config
 12 | 
 13 | 
 14 | config = load_config()
 15 | app_name = config['DEFAULT']['APP_NAME']
 16 | collection_name = config['VECTORDB']['collection_name']
 17 | 
 18 | st.set_page_config(page_title=f"""📄 {app_name} 🤗""", page_icon="📄")
 19 | 
 20 | 
 21 | def main():
 22 |     st.title(f"""Gestion des connaissances 📄""")
 23 | 
 24 |     # collection_name = st.selectbox("Collection", ["Default", "RAG"])
 25 | 
 26 |     count = get_collection_count(collection_name)
 27 |     if count > 0:
 28 |         st.write(f"Il y a **{count}** morceaux (chunks) dans la collection '**{collection_name}**'.")
 29 |     else:
 30 |         st.write("La collection est vide.")
 31 |         st.page_link("pages/2_Load_Document.py", label="Charger les connaissances")
 32 | 
 33 |     st.subheader("Fichier(s) chargé(s)")
 34 | 
 35 |     unique_filenames = extract_unique_name(collection_name,  Metadata.FILENAME.value)
 36 | 
 37 |     for name in unique_filenames:
 38 |         st.markdown(f"""- {name}""")
 39 | 
 40 |     st.subheader("Sujet(s) disponible(s):")
 41 |     unique_topic_names = extract_unique_name(collection_name, Metadata.TOPIC.value)
 42 |     for name in unique_topic_names:
 43 |         st.markdown(f"""- {name}""")
 44 | 
 45 |     # st.subheader("Document Type")
 46 |     # unique_document_types = extract_unique_name(collection_name, 'document_type')
 47 |     # for name in unique_document_types:
 48 |     #     st.markdown(f"""- {name}""")
 49 | 
 50 |     with st.form("search"):
 51 |         st.subheader("Chercher dans la Base de Connaissance:")
 52 |         search = st.text_input("Texte (*)")
 53 | 
 54 |         topic_name = st.selectbox("Sujet", unique_topic_names, index=None)
 55 |         filename = st.selectbox("Nom du Fichier", unique_filenames, index=None)
 56 |         document_type = st.selectbox("Type de Document", [e.value for e in DocumentType], index=None)
 57 |         chunk_type = st.selectbox("Type de Morceau", [e.value for e in ChunkType], index=None)
 58 |         #document_type = st.selectbox("Document Type", unique_document_types, index=None)
 59 | 
 60 |         filters = []
 61 |         if filename:
 62 |             filters.append({Metadata.FILENAME.value: filename})
 63 |         if document_type:
 64 |             filters.append({Metadata.DOCUMENT_TYPE.value: document_type})
 65 |         if topic_name:
 66 |             filters.append({Metadata.TOPIC.value: topic_name})
 67 |         if document_type:
 68 |             filters.append({Metadata.DOCUMENT_TYPE.value: document_type})
 69 |         if chunk_type:
 70 |             filters.append({Metadata.CHUNK_TYPE.value: chunk_type})
 71 |         if st.form_submit_button("Recherche"):
 72 |             # add check for empty string as it is not supported by bedrock (or anthropic?)
 73 |             if search != "":
 74 |                 if len(filters) > 1:
 75 |                     where = {"$and": filters}
 76 |                 elif len(filters) == 1:
 77 |                     where = filters[0]
 78 |                 else:
 79 |                     where = {}
 80 |                 store = get_store()
 81 |                 if isinstance(store, OpenSearchVectorSearch):
 82 |                     result_filters = []
 83 |                     for os_filter in filters:
 84 |                         for key in os_filter.keys():
 85 |                             result_filters.append({"match": {f"metadata.{key}": os_filter[key]}})
 86 |                     result = store.similarity_search(search, k=5, boolean_filter=result_filters)
 87 |                 else:
 88 |                     result = store.similarity_search(search, k=5, filter=where)
 89 |                 st.write(result)
 90 |             else:
 91 |                 st.write("Veuillez entrer un texte.")
 92 | 
 93 |     st.subheader("Administration des Données")
 94 | 
 95 |     col1, col2 = st.columns(2)
 96 |     with col1:
 97 |         file_name_to_delete = st.selectbox("Choisir un fichier", unique_filenames, index=None)
 98 |         if st.button("Supprimer les données du fichier"):
 99 |             delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.FILENAME.value, name=file_name_to_delete)
100 |             delete_file(file_name_to_delete, CollectionType.DOCUMENTS.value)
101 | 
102 |         chunk_type_to_delete = st.selectbox("Choisir un type de morceau (chunk)", [e.value for e in ChunkType], index=None)
103 |         if st.button("Supprimer les données de ce type"):
104 |             delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.CHUNK_TYPE.value,
105 |                                               name=chunk_type_to_delete)
106 | 
107 |     with col2:
108 |         topic_name_to_delete = st.selectbox("Choisir un sujet", unique_topic_names, index=None)
109 |         if st.button("Supprimer les données de ce sujet"):
110 |             delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.TOPIC.value, name=topic_name_to_delete)
111 | 
112 |     if st.button("Supprimer la collection"):
113 |         empty_store(collection_name=collection_name)
114 | 
115 |     with st.expander("Voir toutes les meta-données", expanded=False):
116 |         st.subheader("Méta-données")
117 |         metadatas = get_metadatas(collection_name=collection_name)
118 |         st.code(json.dumps(metadatas, indent=4, sort_keys=True), language="json")
119 | 
120 | 
121 | if __name__ == "__main__":
122 |     if not check_password():
123 |         # Do not continue if check_password is not True.
124 |         st.stop()
125 |     main()
126 | 


--------------------------------------------------------------------------------
/tests/utils/test_utilsfile.py:
--------------------------------------------------------------------------------
  1 | """Test the utilsfile file."""
  2 | 
  3 | import unittest
  4 | import os
  5 | #from pytest import fixture
  6 | from unittest.mock import patch
  7 | from rag_assistant.utils.utilsfile import list_files, _list_files_locally, _list_files_from_s3
  8 | 
  9 | 
 10 | class TestListFiles(unittest.TestCase):
 11 |     """Test the list_files function."""
 12 | 
 13 |     @patch('rag_assistant.utils.utilsfile.config.get')
 14 |     @patch('rag_assistant.utils.utilsfile._list_files_locally')
 15 |     def test_list_files_locally(self, mock_list_files_locally, mock_config_get):
 16 |         """Test list_files with LOCAL configuration."""
 17 |         mock_config_get.return_value = 'LOCAL'
 18 |         mock_list_files_locally.return_value = ['file1.txt', 'file2.txt']
 19 |         result = list_files('my_collection')
 20 |         self.assertEqual(result, ['file1.txt', 'file2.txt'])
 21 |         mock_list_files_locally.assert_called_once_with(file_collection='my_collection')
 22 | 
 23 |     @patch('rag_assistant.utils.utilsfile.config.get')
 24 |     @patch('rag_assistant.utils.utilsfile._list_files_from_s3')
 25 |     def test_list_files_s3(self, mock_list_files_from_s3, mock_config_get):
 26 |         """Test list_files with S3 configuration."""
 27 |         mock_config_get.return_value = 'S3'
 28 |         mock_list_files_from_s3.return_value = ['file1.txt', 'file2.txt']
 29 |         result = list_files('my_collection')
 30 |         self.assertEqual(result, ['file1.txt', 'file2.txt'])
 31 |         mock_list_files_from_s3.assert_called_once_with(file_collection='my_collection')
 32 | 
 33 |     @patch('rag_assistant.utils.utilsfile.config.get')
 34 |     def test_list_files_none(self, mock_config_get):
 35 |         """Test list_files with NONE configuration."""
 36 |         mock_config_get.return_value = 'NONE'
 37 |         result = list_files('my_collection')
 38 |         self.assertIsNone(result)
 39 | 
 40 |     @patch('rag_assistant.utils.utilsfile.config.get')
 41 |     def test_list_files_not_implemented(self, mock_config_get):
 42 |         """Test list_files with an unknown configuration."""
 43 |         mock_config_get.return_value = 'UNKNOWN'
 44 |         with self.assertRaises(NotImplementedError):
 45 |             list_files('my_collection')
 46 | 
 47 |     @patch('rag_assistant.utils.utilsfile.config.get')
 48 |     @patch('os.path.exists')
 49 |     @patch('os.listdir')
 50 |     def test__list_files_locally(self, mock_listdir, mock_path_exists, mock_config_get):
 51 |         """Test _list_files_locally function."""
 52 |         mock_listdir.return_value = ['file1.txt', 'file2.txt', 'file3.jpg']
 53 |         mock_path_exists.return_value = True
 54 |         mock_config_get.return_value = 'data'
 55 |         result = _list_files_locally('my_local_collection')
 56 |         self.assertEqual(result, ['file1.txt', 'file2.txt', 'file3.jpg'])
 57 |         mock_listdir.assert_called_once_with(os.path.join('data', 'my_local_collection'))
 58 |         mock_path_exists.assert_called_once_with(os.path.join('data', 'my_local_collection'))
 59 | 
 60 |     @patch('rag_assistant.utils.utilsfile.config.get')
 61 |     @patch('os.path.exists')
 62 |     @patch('os.listdir')
 63 |     def test__list_files_locally_empty(self, mock_listdir, mock_path_exists, mock_config_get):
 64 |         """Test _list_files_locally function with an empty directory."""
 65 |         mock_listdir.return_value = []
 66 |         mock_path_exists.return_value = True
 67 |         mock_config_get.return_value = 'data'
 68 |         result = _list_files_locally('empty_collection')
 69 |         self.assertEqual(result, [])
 70 |         mock_listdir.assert_called_once_with(os.path.join('data', 'empty_collection'))
 71 | 
 72 |     @patch('rag_assistant.utils.utilsfile.boto3.client')
 73 |     @patch('rag_assistant.utils.utilsfile.config.get')
 74 |     def test__list_files_from_s3(self, mock_config_get, mock_boto3_client):
 75 |         """Test _list_files_from_s3 function."""
 76 |         mock_config_get.return_value = 'my_test_bucket'
 77 |         mock_s3_client = mock_boto3_client.return_value
 78 |         mock_s3_client.list_objects_v2.return_value = {
 79 |             'Contents': [
 80 |                 {'Key': 'file_collection/file1.txt'},
 81 |                 {'Key': 'file_collection/file2.txt'}
 82 |             ]
 83 |         }
 84 |         result = _list_files_from_s3('file_collection')
 85 |         self.assertEqual(result, ['file1.txt', 'file2.txt'])
 86 |         mock_boto3_client.assert_called_once_with('s3')
 87 |         mock_s3_client.list_objects_v2.assert_called_once_with(Bucket='my_test_bucket',
 88 |                                                                Prefix='file_collection')
 89 | 
 90 |     @patch('rag_assistant.utils.utilsfile.boto3.client')
 91 |     @patch('rag_assistant.utils.utilsfile.config.get')
 92 |     def test__list_files_from_s3_empty(self, mock_config_get, mock_boto3_client):
 93 |         """Test _list_files_from_s3 function with an empty collection."""
 94 |         mock_config_get.return_value = 'my_test_bucket'
 95 |         mock_s3_client = mock_boto3_client.return_value
 96 |         mock_s3_client.list_objects_v2.return_value = {}
 97 |         result = _list_files_from_s3('empty_collection')
 98 |         self.assertEqual(result, [])
 99 |         mock_boto3_client.assert_called_once_with('s3')
100 |         mock_s3_client.list_objects_v2.assert_called_once_with(Bucket='my_test_bucket',
101 |                                                                Prefix='empty_collection')
102 | 
103 |     @patch('rag_assistant.utils.utilsfile.boto3.client')
104 |     @patch('rag_assistant.utils.utilsfile.config.get')
105 |     def test__list_files_from_s3_exception(self, mock_config_get, mock_boto3_client):
106 |         """Test _list_files_from_s3 function with an exception."""
107 |         mock_config_get.return_value = 'my_test_bucket'
108 |         mock_s3_client = mock_boto3_client.return_value
109 |         mock_s3_client.list_objects_v2.side_effect = Exception('S3 access error')
110 |         with self.assertRaises(Exception) as context:
111 |             _list_files_from_s3('invalid_collection')
112 |         self.assertTrue('S3 access error' in str(context.exception))
113 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/utilsfile.py:
--------------------------------------------------------------------------------
  1 | """Utils for storing files"""
  2 | import io
  3 | import os
  4 | import logging
  5 | import boto3
  6 | 
  7 | from streamlit.runtime.uploaded_file_manager import UploadedFile
  8 | 
  9 | from .config_loader import load_config
 10 | from .constants import StorageType
 11 | 
 12 | config = load_config()
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | def put_file(file: io.BytesIO, filename:str, file_collection: str = '') -> None:
 17 |     """Persist file to selected storage interface"""
 18 |     storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE')
 19 | 
 20 |     if storage_interface == StorageType.LOCAL.value:
 21 |         _persist_file_locally(file, filename=filename, file_collection=file_collection)
 22 |     elif storage_interface == StorageType.S3.value:
 23 |         _persist_file_to_s3(file, filename=filename, file_collection=file_collection)
 24 |     elif storage_interface == StorageType.NONE.value:
 25 |         pass
 26 |     else:
 27 |         raise NotImplementedError(f"{storage_interface} not implemented yet for storage.")
 28 | 
 29 | 
 30 | def get_file(filename: str, file_collection: str=''):
 31 |     """Get file from selected storage interface"""
 32 |     storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE')
 33 | 
 34 |     if storage_interface == StorageType.LOCAL.value:
 35 |         return _get_file_locally(filename=filename, file_collection=file_collection)
 36 | 
 37 |     if storage_interface == StorageType.S3.value:
 38 |         return _get_file_from_s3(filename=filename, file_collection=file_collection)
 39 | 
 40 |     if storage_interface == StorageType.NONE.value:
 41 |         return None
 42 | 
 43 |     raise NotImplementedError(f"{storage_interface} not implemented yet for storage.")
 44 | 
 45 | 
 46 | def delete_file(filename: str, file_collection: str=''):
 47 |     """Delete file from selected storage interface"""
 48 |     storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE')
 49 | 
 50 |     if storage_interface == StorageType.LOCAL.value:
 51 |         return _delete_file_locally(filename=filename, file_collection=file_collection)
 52 | 
 53 |     if storage_interface == StorageType.S3.value:
 54 |         return _delete_file_from_s3(filename=filename, file_collection=file_collection)
 55 | 
 56 |     if storage_interface == StorageType.NONE.value:
 57 |         return None
 58 | 
 59 |     raise NotImplementedError(f"{storage_interface} not implemented yet for storage.")
 60 | 
 61 | 
 62 | def list_files(file_collection: str=''):
 63 |     """List files from selected storage interface"""
 64 |     storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE')
 65 | 
 66 |     if storage_interface == StorageType.LOCAL.value:
 67 |         return _list_files_locally(file_collection=file_collection)
 68 | 
 69 |     if storage_interface == StorageType.S3.value:
 70 |         return _list_files_from_s3(file_collection=file_collection)
 71 | 
 72 |     if storage_interface == StorageType.NONE.value:
 73 |         return None
 74 | 
 75 |     raise NotImplementedError(f"{storage_interface} not implemented yet for storage.")
 76 | 
 77 | 
 78 | def _persist_file_to_s3(file: io.BytesIO, filename: str, file_collection: str) -> None:
 79 |     """Persist file to S3 storage"""
 80 |     logger.info("On persiste un document : %s sur S3", filename)
 81 |     s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME')
 82 | 
 83 |     file_key = f"{file_collection}/{filename}"
 84 | 
 85 |     s3_client = boto3.client('s3')
 86 | 
 87 |     s3_client.upload_fileobj(file, s3_bucket, file_key)
 88 | 
 89 | 
 90 | def _persist_file_locally(file: io.BytesIO, filename:str, file_collection: str) -> None:
 91 |     """Persist file to local storage"""
 92 |     logger.info("On persiste un document : %s localement", filename)
 93 |     documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH')
 94 | 
 95 |     file_path = os.path.join(documents_path, file_collection)
 96 | 
 97 |     if not os.path.exists(file_path):
 98 |         os.makedirs(file_path)
 99 | 
100 |     file_path = os.path.join(file_path, filename)
101 | 
102 |     with open(file_path, 'wb') as f:
103 |         f.write(file.getbuffer())
104 | 
105 | 
106 | def _get_file_locally(filename: str, file_collection: str):
107 |     """Get file from local storage"""
108 |     documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH')
109 | 
110 |     file_path = os.path.join(documents_path, file_collection, filename)
111 | 
112 |     if os.path.exists(file_path):
113 |         return open(file_path, 'rb').read()
114 | 
115 |     return None
116 | 
117 | 
118 | def _get_file_from_s3(filename: str, file_collection: str):
119 |     """Get file from S3 storage"""
120 |     s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME')
121 | 
122 |     file_key = f"{file_collection}/{filename}"
123 | 
124 |     s3_client = boto3.client('s3')
125 | 
126 |     response = s3_client.get_object(Bucket=s3_bucket, Key=file_key)
127 |     return response['Body'].read()
128 | 
129 | 
130 | def _delete_file_locally(filename: str, file_collection: str):
131 |     """Delete file from local storage"""
132 |     documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH')
133 | 
134 |     file_path = os.path.join(documents_path, file_collection, filename)
135 | 
136 |     if os.path.exists(file_path):
137 |         os.remove(file_path)
138 | 
139 | 
140 | def _delete_file_from_s3(filename: str, file_collection: str):
141 |     """Delete file from S3 storage"""
142 |     s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME')
143 | 
144 |     file_key = f"{file_collection}/{filename}"
145 | 
146 |     s3_client = boto3.client('s3')
147 | 
148 |     s3_client.delete_object(Bucket=s3_bucket, Key=file_key)
149 | 
150 | 
151 | def _list_files_locally(file_collection: str):
152 |     """List files from local storage"""
153 |     documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH')
154 | 
155 |     file_path = os.path.join(documents_path, file_collection)
156 | 
157 |     if os.path.exists(file_path):
158 |         return os.listdir(file_path)
159 | 
160 |     return []
161 | 
162 | 
163 | def _list_files_from_s3(file_collection: str):
164 |     """List files from S3 storage"""
165 |     s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME')
166 | 
167 |     s3_client = boto3.client('s3')
168 | 
169 |     response = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=file_collection)
170 | 
171 |     if 'Contents' in response:
172 |         return [obj['Key'].split('/')[-1] for obj in response['Contents']]
173 | 
174 |     return []
175 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/utilsvision.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import hashlib
  3 | import imghdr
  4 | import json
  5 | import os
  6 | import io
  7 | import shutil
  8 | from typing import Optional, Union
  9 | 
 10 | import boto3
 11 | from langchain_core.documents import Document
 12 | from pypdf import PdfReader
 13 | from streamlit.runtime.uploaded_file_manager import UploadedFile
 14 | 
 15 | from utils.constants import ChunkType, Metadata, CollectionType
 16 | from utils.config_loader import load_config
 17 | from utils.utilsdoc import clean_text
 18 | from utils.utilsfile import put_file
 19 | 
 20 | config = load_config()
 21 | 
 22 | aws_profile_name = os.getenv("profile_name")
 23 | bedrock_region_name = config["BEDROCK"]["AWS_REGION_NAME"]
 24 | #bedrock_embeddings_model = config["BEDROCK"]["EMBEDDINGS_MODEL"]
 25 | bedrock_endpoint_url = config["BEDROCK"]["BEDROCK_ENDPOINT_URL"]
 26 | vision_model = config["VISION"]["VISION_MODEL"]
 27 | 
 28 | boto3.setup_default_session(profile_name=os.getenv("profile_name"))
 29 | bedrock = boto3.client("bedrock-runtime", bedrock_region_name, endpoint_url=bedrock_endpoint_url)
 30 | 
 31 | 
 32 | 
 33 | extract_image_output_dir = config['VISION']['IMAGE_OUTPUT_DIR']
 34 | 
 35 | def image_to_text(encoded_image, media_type) -> Optional[str]:
 36 |     system_prompt = """Describe every detail you can about this image,
 37 |         be extremely thorough and detail even the most minute aspects of the image.
 38 |         Start your description by providing an image title followed by a short overall summary.
 39 |         If the image is a table, output the content of the table in a structured format.
 40 |         """
 41 | 
 42 |     prompt = {
 43 |         "anthropic_version": "bedrock-2023-05-31",
 44 |         "max_tokens": 1000,
 45 |         "temperature": 0,
 46 |         "system": system_prompt,
 47 |         "messages": [
 48 |             {
 49 |                 "role": "user",
 50 |                 "content": [
 51 |                     {
 52 |                         "type": "image",
 53 |                         "source": {
 54 |                             "type": "base64",
 55 |                             "data": encoded_image,
 56 |                             "media_type": media_type
 57 |                         }
 58 |                     },
 59 |                     {
 60 |                         "type": "text",
 61 |                         "text": system_prompt
 62 |                     }
 63 |                 ]
 64 |             }
 65 |         ]
 66 |     }
 67 | 
 68 |     json_prompt = json.dumps(prompt)
 69 |     try:
 70 |         response = bedrock.invoke_model(body=json_prompt, modelId=vision_model,
 71 |                                         accept="application/json", contentType="application/json")
 72 |         response_body = json.loads(response.get('body').read())
 73 |         output = response_body['content'][0]['text']
 74 |         return output
 75 | 
 76 |     # Catch all other (unexpected) exceptions
 77 |     except Exception as e:
 78 |         print(f"An unexpected error occurred: {e}")
 79 |         return None
 80 | 
 81 | 
 82 | def generate_unique_id(fname):
 83 |     # Generate MD5 hash of the filename
 84 |     hash_object = hashlib.md5(fname.name.encode())
 85 |     hex_dig = hash_object.hexdigest()
 86 |     return hex_dig
 87 | 
 88 | 
 89 | def load_image(pdfs: Union[list[UploadedFile], None, UploadedFile], metadata = None, restart_image_analysis:bool = False, ) -> Optional[list[Document]]:
 90 |     if pdfs is not None:
 91 |         docs = []
 92 |         if metadata is None:
 93 |             metadata = {}
 94 |         metadata.update({Metadata.CHUNK_TYPE.value: ChunkType.IMAGE.value})
 95 |         for pdf in pdfs:
 96 |             if pdf.type == "application/pdf":
 97 |                 # Generate a unique identifier for each document
 98 |                 tmp_id_based_on_file_upload = generate_unique_id(pdf)
 99 |                 # Construct a save directory and create it
100 |                 save_dir = f"{extract_image_output_dir}/{tmp_id_based_on_file_upload}"
101 |                 if restart_image_analysis:
102 |                     # Before processing is done, remove the directory and its contents
103 |                     shutil.rmtree(save_dir)
104 | 
105 |                 reader = PdfReader(pdf)
106 | 
107 |                 os.makedirs(save_dir, exist_ok=True)
108 | 
109 |                 for i, page in enumerate(reader.pages, start=1):
110 |                     for image in page.images:
111 | 
112 |                         save_path = f"{save_dir}/{image.name}"
113 |                         json_path = f"{save_dir}/{image.name}.json"
114 | 
115 |                         if os.path.exists(json_path):
116 |                             # skip the image if it is already processed
117 |                             with open(json_path, "r") as file:  # Open the document file
118 |                                 doc_data = json.load(file)  # Load the data from the document
119 |                                 # Create a new Document instance using the loaded data
120 |                                 doc = Document(page_content=doc_data['page_content'],  metadata=doc_data['metadata'])
121 |                                 docs.append(doc)  # Add the document to the docs list
122 |                             continue
123 | 
124 |                         with open(save_path, "wb") as fp:
125 |                             fp.write(image.data)
126 | 
127 |                         # Determine image type
128 |                         image_type = imghdr.what(save_path)
129 |                         media_type = f"image/{image_type}"
130 | 
131 |                         image_content = encode_image(save_path)
132 |                         image_description = image_to_text(image_content, media_type)
133 |                         if image_description is not None:
134 |                             page_metadata = {'page': i, 'filename': pdf.name, 'media_type': media_type}
135 |                             page_metadata.update(metadata)
136 |                             doc = Document(page_content=clean_text(image_description), metadata=page_metadata)
137 |                             docs.append(doc)
138 | 
139 |                             with open(json_path, "w") as file:
140 |                                 json.dump(doc.__dict__, file)
141 | 
142 |                         else:
143 |                             print(f"Failed to extract text from image {image.name}.")
144 | 
145 |                         put_file(io.BytesIO(image.data), image.name, CollectionType.IMAGES.value)
146 |         return docs
147 |     else:
148 |         return None
149 | 
150 | 
151 | def encode_image(image_path):
152 |     """Function to encode images"""
153 |     with open(image_path, "rb") as image_file:
154 |         return base64.b64encode(image_file.read()).decode('utf-8')
155 | 


--------------------------------------------------------------------------------
/rag_assistant/shared/llm_facade.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | from utils.config_loader import load_config
  4 | 
  5 | config = load_config()
  6 | 
  7 | 
  8 | # La génération par LLM va etre fait au moment de la création du summary index.
  9 | # cela prend trop de temps ici.
 10 | # l'objectif est de mutualiser la fonctionnalités conversations starters pour les deux chats
 11 | # llm = load_model(streaming=True)
 12 | #
 13 | # context = summary_query_engine.query("Make a complete summary of knowledge available"
 14 | #                                      " on following topics {topics}.")
 15 | #
 16 | # ### Answer question ###
 17 | # cs_system_prompt = """You are a helpful solution architect and software engineer assistant.
 18 | #     Your users are asking questions on specific topics.\
 19 | #     Suggest exactly 6 questions related to the provided context to help them find the information they need. \
 20 | #     Suggest only short questions without compound sentences. \
 21 | #     Question must be self-explanatory and topic related.
 22 | #     Suggest a variety of questions that cover different aspects of the context. \
 23 | #     Use the summary of knowledge to generate the question on topics. \
 24 | #     Make sure they are complete questions, and that they are related to the topics.
 25 | #     Output one question per line. Do not number the questions. Do not group question by topics.
 26 | #     DO NOT make a summary or an introduction of your result. Output ONLY the generated questions.
 27 | #     DO NOT output chapter per topic. Avoid blank line.
 28 | #     Avoid duplicate question. Generate question in French.
 29 | #     Questions: """
 30 | #
 31 | # #         Examples:
 32 | # #         What information needs to be provided during IHM launch?
 33 | # #         How is the data transferred to the service call?
 34 | # #         What functions are involved in API Management?
 35 | # #         What does the Exposure function in API Management entail?
 36 | #
 37 | # cs_prompt = ChatPromptTemplate.from_messages(
 38 | #     [
 39 | #         ("system", cs_system_prompt),
 40 | #         ("human", "{topics}"
 41 | #                   "{summary}"),
 42 | #     ]
 43 | # )
 44 | # output_parser = StrOutputParser()
 45 | # model = load_model(streaming=False)
 46 | #
 47 | # chain = cs_prompt | model | output_parser
 48 | # response = chain.invoke({"topics": topics, "summary": context})
 49 | 
 50 | def get_conversation_starters(topics: list[str], count:int = 4):
 51 | 
 52 |     response = ""
 53 | 
 54 |     response_list = [line for line in response.split("\n") if line.strip() != '']
 55 |     if len(response_list) > count:
 56 |         response_list = random.sample(response_list, count)
 57 | 
 58 |     elif len(response_list) < count:
 59 |         diff = count - len(response_list)
 60 | 
 61 |         suggested_questions = suggested_questions_examples
 62 | 
 63 |         # check if 'API' is in topics
 64 |         if 'API' in topics:
 65 |             suggested_questions.extend(suggested_questions_examples_api)
 66 | 
 67 |         # check if 'IHM' is in topics
 68 |         if 'IHM' in topics:
 69 |             suggested_questions.extend(suggested_questions_examples_ihm)
 70 | 
 71 |         all_questions = list(suggested_questions)
 72 | 
 73 |         selected_questions = set(response_list)
 74 | 
 75 |         while len(selected_questions) < count:
 76 |             question = random.choice(suggested_questions)
 77 |             selected_questions.add(question)
 78 | 
 79 |         response_list = list(selected_questions)
 80 |         # for _ in range(min(count, len(all_questions))):
 81 |         #     question = random.choice(all_questions)
 82 |         #     all_questions.remove(question)
 83 |         #     response_list.append(question)
 84 | 
 85 |         #additional_questions = random.sample(suggested_questions, diff)
 86 |         #response_list.extend(additional_questions)
 87 | 
 88 |     return response_list
 89 | 
 90 | 
 91 | suggested_questions_examples = [
 92 |     "Comment sécuriser les données sensibles ?",
 93 |     "Comment assurer l'efficacité des performances ?",
 94 |     "En quoi consiste l'Analyse de risque MESARI ?",
 95 |     "A quoi sert le Cross Origin Resource Sharing ?",
 96 |     "Quels sont les principes de la Content Security Policy ?",
 97 |     "Comment garantir la sécurité des échanges entre applications ?",
 98 |     "Quelles sont les bonnes pratiques pour assurer la fiabilité des ressources Web ?",
 99 |     "Pourquoi suivre les spécifications associées est-il important ?"
100 | ]
101 |     # API
102 | suggested_questions_examples_api = [
103 |     "Quels sont les mécanismes d'authentification API ?",
104 |     "Quelles sont les principales fonctionnalités du portail fournisseur ?",
105 |     "Que comprend la fonction d'exposition dans la gestion des API ?",
106 |     "Quelle est la différence entre SOAP et REST ?",
107 |     "Que signifie l'acronyme API ?",
108 |     "Quels formats de données sont couramment utilisés dans les APIs ?",
109 |     "Comment tester et déboguer une API ?",
110 |     "Quels sont les avantages d'utiliser une API ?",
111 |     "Que signifie REST et quels en sont les principes clés ?",
112 |     "Comment gérer les versions dans une API ?",
113 |     "Quels outils permettent de documenter une API ?",
114 |     "Comment implémenter une pagination dans une API ?",
115 |     "Qu'est-ce qu'une architecture d'API ?",
116 | ]
117 | suggested_questions_examples_ihm = [
118 |     # IHM
119 |     "Quelles informations doivent être fournies lors du lancement de l'IHM?",
120 |     "Quels sont les principes de base d'une bonne conception d'interface utilisateur ?",
121 |     "Comment rendre une interface utilisateur accessible aux personnes handicapées ?",
122 |     "Quels sont les différents types de composants d'interface utilisateur ?",
123 |     "Comment concevoir une expérience utilisateur cohérente sur différents appareils ?",
124 |     "Quels sont les avantages du design 'mobile first' ?",
125 |     "Comment effectuer des tests d'utilisabilité pour une interface ?",
126 |     "Que signifie 'responsive design' pour une interface web ?",
127 |     "Quels frameworks facilitent le développement d'interfaces utilisateur modernes ?",
128 |     "Comment optimiser les performances d'une interface utilisateur ?",
129 |     "Quelle est l'importance des conventions de conception dans une interface ?",
130 | ]
131 |     # AUTRES QUESTIONS API
132 |     # "Comment structurer une API RESTful ?",
133 |     # "Quels sont les bons usages des méthodes HTTP ?",
134 |     # "Comment définir des URIs pour les ressources ?",
135 |     # "Qu'est-ce que HATEOAS et comment l'implémenter ?",
136 |     # "Comment paginer et filtrer des collections de ressources ?",
137 |     # "Quels mécanismes utiliser pour l'authentification API ?",
138 |     # "Comment gérer les versions d'une API ?",
139 |     # "Quelle est la stratégie de contrôle d'accès recommandée ?",
140 |     # "Comment documenter une API efficacement ?",
141 |     # "Comment implémenter le throttling pour une API ?",
142 |     # "Quels sont les principes de conception d'une IHM intuitive ?",
143 |     # "Comment assurer la résilience d'une API ?",
144 |     # "Quels sont les formats standards pour les données API ?",
145 |     # "Comment surveiller la performance d'une API ?",
146 |     # "Quels sont les aspects de sécurité à considérer pour une API ?",
147 |     # "Comment gérer la rétrocompatibilité des API ?",
148 |     # "Quels sont les avantages du caching pour une API ?",
149 |     # "Comment assurer la haute disponibilité d'une API ?",
150 |     # "Quels outils utiliser pour le monitoring d'une API ?",
151 |     # "Comment prévenir les injections dans une API ?"
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # applied-ai-rag-assistant
  2 | Assistant RAG Advanced with Streamlit, Langchain, LlamaIndex and ChromaDB
  3 | 
  4 | Initially forked from https://github.com/langchain-ai/streamlit-agent/ `chat_with_documents.py`
  5 | 
  6 | Apps feature LangChain 🤝 Streamlit integrations such as the
  7 | [Callback integration](https://python.langchain.com/docs/modules/callbacks/integrations/streamlit) and
  8 | [StreamlitChatMessageHistory](https://python.langchain.com/docs/integrations/memory/streamlit_chat_message_history).
  9 | 
 10 | Now we have added Mistral La Plateforme, Bedrock, llamaindex and langchain agent for advanced RAG, model vision on RAG with anthropic claude.
 11 | 
 12 | ## Setup
 13 | 
 14 | This project uses [Poetry](https://python-poetry.org/) for dependency management.
 15 | 
 16 | ```shell
 17 | # Create Python environment
 18 | $ poetry install
 19 | 
 20 | # Install git pre-commit hooks
 21 | $ poetry shell
 22 | $ pre-commit install
 23 | ```
 24 | 
 25 | ### Note on package dependencies
 26 | For now, we are not forcing package's version in poetry and try to upgrade as fast as we can. :)
 27 | As we are using a lot of new and young "GENAI" component that have not finalized their interface,
 28 | application and tests tends to break a lot and often especially they are not testing evolution with each other.
 29 | 
 30 | Main packages are:
 31 | - Langchain (LLM Orchestration and agent)
 32 | - LlamaIndex (RAG)
 33 | - Streamlit (UX)
 34 | - TruLens (Testing)
 35 | - Chroma (Vector Store)
 36 | - OpenAI (LLM)
 37 | - MistralAI (LLM)
 38 | - boto3 (for bedrock and AWS integration)
 39 | 
 40 | ## Running
 41 | 
 42 | ### Environment variables
 43 | The project expects some environment variables to be setup in order to run.
 44 | Some are mandatory for running and some are only needed if you want to run on a specific platform.
 45 | 
 46 | The project currently supports the following platforms: OPENAI, AZURE, MISTRAL, BEDROCK (AWS).
 47 | 
 48 | We recommend to add the variables in a .env file within the directory path outside the project directory to avoid any accidental commit.
 49 | Your home directory is fine.
 50 | 
 51 | Here are the variables:
 52 | 
 53 | ```shell
 54 | OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
 55 | MISTRAL_API_KEY=<YOUR_MISTRAL_API_KEY>
 56 | AZURE_OPENAI_API_KEY=<YOUR_AZURE_OPENAI_API_KEY>
 57 | HF_TOKEN=<YOUR_HUGGING_FACE_TOKEN>
 58 | LANGCHAIN_TRACING_V2=<true or false>
 59 | LANGCHAIN_API_KEY=<YOUR_LANGCHAIN_API_KEY>
 60 | ```
 61 | 
 62 | ### MISTRAL PLATFORM
 63 | If you want to use MISTRAL PLATFORM, you need a MISTRAL_API_KEY and a HF_TOKEN.
 64 | HF_TOKEN is required to download the embeddings from hugging face.
 65 | It is done automatically but you need to have the HF_TOKEN and to have granted access on the model page on hugging face.
 66 | https://huggingface.co/mistralai/Mixtral-8x7B-v0.1
 67 | 
 68 | 
 69 | ### LANGSMITH and LLM OBSERVABILITY
 70 | We are using LANGSMITH for LLM Observability. 
 71 | Langsmith requires LANGCHAIN_TRACING_V2 and a LANGCHAIN_API_KEY.
 72 | 
 73 | You can stop tracing with 'LANGCHAIN_TRACING_V2=false'.
 74 | Oddly 'LANGCHAIN_API_KEY' is still required even if you set 'LANGCHAIN_TRACING_V2' to false. 
 75 | But you can put anything in it, the variable should only exist.
 76 | 
 77 | LANGSMITH is free for personal use with a quota limit of 5k traces per month. 
 78 | It is very useful so I recommend it to you.
 79 | 
 80 | https://smith.langchain.com/
 81 | 
 82 | ### AWS BEDROCK
 83 | If you want to use Bedrock (AWS), you can define your credential in $HOME/.aws/credentials directory
 84 | We use eu-west-3 and eu-central-1 for  claude anthropic, mistral large and titan embeddings within bedrock.
 85 | Adapt it to your own needs. Beware that models are not consistently deployed within AWS region. 
 86 | 
 87 | 
 88 | ### MODEL VISION
 89 | We are starting to add model vision support in our assistant.
 90 | For now, we are only supporting CLAUDE 3 vision with BEDROCK AWS.
 91 | 
 92 | 
 93 | ## Config
 94 | Most parameters like model name, region, etc. can be modified in conf/config.ini for all model providers.
 95 | 
 96 | 
 97 | ## Testing
 98 | We use pytest and Trulens to evaluate the assistant (RAG Triad). 
 99 | 
100 | For RAG testing, we are using OpenAI as provider for trulens feedback function so you need at least openai api key to make it work. 
101 | But you can adapt it for your own purpose.
102 | 
103 | Tests in tests/utils/ directory use Mistral Large through 'La Platforme' so you'll MISTRAL_API_KEY.
104 | Tests in tests/rag/ use bedrock (AWS) and openai GPT. So you'll need OPENAI_API_KEY and AWS credentials.
105 | 
106 | 
107 | ```shell
108 | # Run mrkl_demo.py or another app the same way
109 | $ streamlit run streamlit_agent/0_Chat_with_Docs.py
110 | ```
111 | 
112 | # Running with Docker (OLD)
113 | 
114 | This project includes `Dockerfile` to run the app in Docker container. In order to optimise the Docker Image is optimised for size and building time with cache techniques.
115 | 
116 | To generate Image with `DOCKER_BUILDKIT`, follow below command
117 | 
118 | ```DOCKER_BUILDKIT=1 docker build --target=runtime . -t applied-ai-rag-assistant:latest```
119 | 
120 | 1. Run the docker container directly
121 | 
122 | ``docker run -d --name applied-ai-rag-assistant -p 8051:8051 applied-ai-rag-assistant:latest ``
123 | 
124 | 2. Run the docker container using docker-compose (Recommended)
125 | 
126 | Edit the Command in `docker-compose` with target streamlit app
127 | 
128 | ``docker-compose up``
129 | 
130 | ## Run the app with Docker
131 | 
132 | Build the image:
133 | ```sh
134 | docker build -t ai_assistant .
135 | ```
136 | 
137 | Then run a container from the image we just created :
138 | ```sh
139 | docker run -p 80:80 -e OPENAI_API_KEY="secret_value" ai_assistant
140 | ```
141 | Replace secret_value with your openai key. 
142 | 
143 | The application should run on http://localhost:80/
144 | 
145 | ## Run the app on AWS
146 | 
147 | Install AWS CLI : https://docs.aws.amazon.com/fr_fr/cli/latest/userguide/getting-started-install.html
148 | Install Docker : https://docs.docker.com/engine/install/
149 | 
150 | Build and push the Docker image on the AWS Elastic Container Registry (ECR) with AWS CLI:
151 | ```sh
152 | docker build -t ai_assistant .
153 | aws configure set aws_access_key_id "access-key"
154 | aws configure set aws_secret_access_key "secret-access-key"
155 | aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 441525731509.dkr.ecr.eu-west-1.amazonaws.com
156 | docker tag ai_assistant:latest 441525731509.dkr.ecr.eu-west-1.amazonaws.com/ai_assistant:latest
157 | docker push 441525731509.dkr.ecr.eu-west-1.amazonaws.com/ai_assistant:latest
158 | ```
159 | Replace access-key and secret-access-key with valid AWS credentials that will be used to push to the ECR. The AWS user must have the correct rights to push image on the ECR.
160 | 
161 | Once the image pushed on the ECR, go to the terraform directory with (make sure to meet the basic requirements in AWS so that the terraform files works (see AWS resources requirements)) :
162 | ```sh
163 | cd terraform
164 | ```
165 | and run:
166 | ```sh
167 | export AWS_ACCESS_KEY="access-key"
168 | export AWS_SECRET_ACCESS_KEY="secret-access-key"
169 | terraform init
170 | terraform plan
171 | terraform apply
172 | ```
173 | Terraform is needed : https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli
174 | Replace access-key and secret-access-key with valid AWS credentials that will be used to create the resources. The account must have all the necessary rights to create and access the resources needed.
175 | 
176 | You will find the ECS cluster here : https://eu-west-1.console.aws.amazon.com/ecs/v2/clusters?region=eu-west-1
177 | 
178 | To redeploy the service with the latest version of the application, go to : https://eu-west-1.console.aws.amazon.com/ecs/v2/clusters/ai_assistant/services?region=eu-west-1.
179 | Select your running service in the services list
180 | Click on Update.
181 | Check Force new Deployment, and click on Update.
182 | The latest version of the image will be deployed with the new service.


--------------------------------------------------------------------------------
/tests/utils/test_utilsrag_li.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import os
  3 | from dotenv import load_dotenv, find_dotenv
  4 | 
  5 | from llama_index.core import SimpleDirectoryReader, Settings
  6 | from llama_index.embeddings.mistralai import MistralAIEmbedding
  7 | from llama_index.llms.mistralai import MistralAI
  8 | 
  9 | import rag_assistant.utils.utilsrag_li
 10 | from rag_assistant.utils.utilsrag_li import create_automerging_engine, create_sentence_window_engine, create_subquery_engine, \
 11 |     create_direct_query_engine, create_li_agent
 12 | 
 13 | import shutil
 14 | 
 15 | 
 16 | import numpy as np
 17 | 
 18 | import nest_asyncio
 19 | 
 20 | from trulens_eval import (
 21 |     Feedback,
 22 |     TruLlama,
 23 |     OpenAI,
 24 |     Tru, Select
 25 | )
 26 | from trulens_eval.app import App
 27 | 
 28 | load_dotenv(find_dotenv())
 29 | 
 30 | # Set OpenAI API key from Streamlit secrets
 31 | openai_api_key = os.getenv('OPENAI_API_KEY')
 32 | 
 33 | nest_asyncio.apply()
 34 | 
 35 | provider = OpenAI()
 36 | 
 37 | def get_openai_api_key():
 38 |     _ = load_dotenv(find_dotenv())
 39 | 
 40 |     return os.getenv("OPENAI_API_KEY")
 41 | 
 42 | 
 43 | def get_hf_api_key():
 44 |     _ = load_dotenv(find_dotenv())
 45 | 
 46 |     return os.getenv("HUGGINGFACE_API_KEY")
 47 | 
 48 | 
 49 | def get_trulens_feedbacks(query_engine):
 50 | 
 51 |     context = App.select_context(query_engine)
 52 | 
 53 |     qa_relevance = (
 54 |         Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
 55 |         .on_input_output()
 56 |     )
 57 | 
 58 |     qs_relevance = (
 59 |         Feedback(provider.context_relevance_with_cot_reasons, name="Context Relevance")
 60 |         .on_input()
 61 |         .on(context)
 62 |         .aggregate(np.mean)
 63 |     )
 64 | 
 65 |     groundedness = (
 66 |         Feedback(provider.groundedness_measure_with_cot_reasons, name = "Groundedness")
 67 |         .on(context.collect())
 68 |         .on_output()
 69 |     )
 70 | 
 71 |     feedbacks = [qa_relevance, qs_relevance, groundedness]
 72 |     return feedbacks
 73 | 
 74 | 
 75 | def get_prebuilt_trulens_recorder(query_engine, app_id, feedbacks):
 76 |     tru_recorder = TruLlama(
 77 |         query_engine,
 78 |         app_id=app_id,
 79 |         feedbacks=feedbacks
 80 |     )
 81 |     return tru_recorder
 82 | 
 83 | 
 84 | @pytest.fixture(scope="module")
 85 | def temp_dir(request):
 86 |     # TODO define a test specific dir...
 87 |     # Setup: Create a temporary directory for the test module
 88 |     dir_name = rag_assistant.utils.utilsrag_li.llama_index_root_dir
 89 |     os.makedirs(dir_name, exist_ok=True)
 90 |     shutil.rmtree(dir_name)
 91 |     # Yield the directory name to the tests
 92 |     yield dir_name
 93 | 
 94 |     # Teardown: Remove the temporary directory after tests are done
 95 |     if os.path.isdir(dir_name):  # Check if the directory exists before removing it
 96 |         #shutil.rmtree(dir_name) # TODO commenting this while fix above is not done
 97 |         pass
 98 | 
 99 | 
100 | def llm_prepare():
101 |     llm = MistralAI(model="mistral-large-latest")
102 | 
103 |     Settings.llm = llm
104 | 
105 |     return llm
106 | 
107 | 
108 | def embeddings_prepare():
109 |     embed_model = MistralAIEmbedding()
110 | 
111 |     Settings.embed_model = embed_model
112 | 
113 |     return embed_model
114 | 
115 | 
116 | @pytest.fixture
117 | def docs_prepare():
118 |     documents = SimpleDirectoryReader(
119 |         input_files=["tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf"]
120 |     ).load_data()
121 |     return documents
122 | 
123 | 
124 | @pytest.fixture
125 | def eval_questions_prepare():
126 |     eval_questions = []
127 |     with open('tests/utils/eval_questions.txt', 'r') as file:
128 |         for line in file:
129 |             # Remove newline character and convert to integer
130 |             item = line.strip()
131 |             print(item)
132 |             eval_questions.append(item)
133 |     return eval_questions
134 | 
135 | 
136 | def test_automerging_agent(temp_dir,
137 |                            docs_prepare, eval_questions_prepare, trulens_prepare):
138 | 
139 |     llm = llm_prepare()
140 | 
141 |     query_engine = create_automerging_engine(docs_prepare)
142 | 
143 |     feedbacks = get_trulens_feedbacks(query_engine)
144 | 
145 |     tru_recorder = get_prebuilt_trulens_recorder(query_engine,
146 |                                                  app_id="Automerging Query Engine",
147 |                                                  feedbacks=feedbacks)
148 | 
149 |     with tru_recorder as recording:
150 |         for question in eval_questions_prepare:
151 |             response = query_engine.query(question)
152 |             assert response is not None, "L'interprétation n'a pas retourné de résultat."
153 | 
154 |     agent = create_li_agent(name="test_automerging_agent", description="Test Automerging Agent",
155 |                             query_engine=query_engine, llm=llm)
156 | 
157 |     response = agent.query("How do I get started on a personal project in AI?")
158 |     print(f"response: {str(response)}")
159 |     assert response is not None, "L'interprétation n'a pas retourné de résultat."
160 | 
161 | 
162 | def test_sentence_window_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare):
163 | 
164 |     llm = llm_prepare()
165 | 
166 |     query_engine = create_sentence_window_engine(
167 |         docs_prepare,
168 |     )
169 | 
170 |     feedbacks = get_trulens_feedbacks(query_engine)
171 | 
172 |     tru_recorder = get_prebuilt_trulens_recorder(query_engine,
173 |                                                  app_id="Sentence Window Query Engine",
174 |                                                  feedbacks=feedbacks)
175 | 
176 |     with tru_recorder as recording:
177 |         for question in eval_questions_prepare:
178 |             response = query_engine.query(question)
179 | 
180 |     agent = create_li_agent(name="test_sentence_window_agent", description="Test Sentence Window Agent",
181 |                             query_engine=query_engine, llm=llm)
182 | 
183 |     response = agent.query("How do I get started on a personal project in AI?")
184 |     assert response is not None, "L'interprétation n'a pas retourné de résultat."
185 | 
186 | 
187 | def test_llamaindex_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare):
188 | 
189 |     llm = llm_prepare()
190 | 
191 |     query_engine = create_direct_query_engine(
192 |         docs_prepare,
193 |     )
194 | 
195 |     feedbacks = get_trulens_feedbacks(query_engine)
196 | 
197 |     tru_recorder = get_prebuilt_trulens_recorder(query_engine,
198 |                                                  app_id="Direct Query Engine",
199 |                                                  feedbacks=feedbacks)
200 | 
201 |     with tru_recorder as recording:
202 |         for question in eval_questions_prepare:
203 |             response = query_engine.query(question)
204 | 
205 |     agent = create_li_agent(name="test_direct_query_agent", description="Test Direct Query Agent",
206 |                             query_engine=query_engine,
207 |                             llm=llm)
208 | 
209 |     response = agent.query("How do I get started on a personal project in AI?")
210 |     assert response is not None, "L'interprétation n'a pas retourné de résultat."
211 | 
212 | 
213 | def test_subquery_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare):
214 | 
215 |     llm = llm_prepare()
216 | 
217 |     topics = ["AI", "Other"]
218 |     query_engine = create_subquery_engine(
219 |         topics,
220 |         docs_prepare,
221 |     )
222 | 
223 |     feedbacks = get_trulens_feedbacks(query_engine)
224 | 
225 |     tru_recorder = get_prebuilt_trulens_recorder(query_engine,
226 |                                                  app_id="Sub Query Engine",
227 |                                                  feedbacks=feedbacks)
228 | 
229 |     with tru_recorder as recording:
230 |         for question in eval_questions_prepare:
231 |             response = query_engine.query(question)
232 | 
233 |     agent = create_li_agent(name="test_subquery_agent", description="Test Subquery Agent",
234 |                             query_engine=query_engine,
235 |                             llm=llm)
236 | 
237 |     response = agent.query("How do I get started on a personal project in AI?")
238 |     assert response is not None, "L'interprétation n'a pas retourné de résultat."
239 | 
240 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/utilsllm.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | import openai
  3 | import os
  4 | 
  5 | from typing import Optional
  6 | 
  7 | from langchain_aws.chat_models import ChatBedrock
  8 | from langchain_core.embeddings import Embeddings
  9 | from langchain_core.language_models import BaseChatModel
 10 | from langchain_openai import ChatOpenAI, AzureChatOpenAI
 11 | from langchain_openai.embeddings import OpenAIEmbeddings, AzureOpenAIEmbeddings
 12 | from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings
 13 | from langchain_aws.embeddings.bedrock import BedrockEmbeddings
 14 | from llama_index.core.base.embeddings.base import BaseEmbedding
 15 | from llama_index.core.llms import LLM
 16 | 
 17 | from llama_index.embeddings.mistralai import MistralAIEmbedding as LIMistralAIEmbedding
 18 | from llama_index.embeddings.openai import OpenAIEmbedding as LIOpenAIEmbedding
 19 | from llama_index.embeddings.bedrock import BedrockEmbedding as LIBedrockEmbedding
 20 | from llama_index.llms.mistralai import MistralAI as LIMistralAI
 21 | from llama_index.llms.openai import OpenAI as LIOpenAI
 22 | from llama_index.llms.bedrock import Bedrock as LIBedrock
 23 | 
 24 | 
 25 | from dotenv import load_dotenv, find_dotenv
 26 | 
 27 | from .config_loader import load_config
 28 | 
 29 | config = load_config()
 30 | 
 31 | # read local .env file
 32 | _ = load_dotenv(find_dotenv())
 33 | 
 34 | openai.api_key = os.getenv('OPENAI_API_KEY')
 35 | mistral_api_key = os.getenv('MISTRAL_API_KEY')
 36 | aws_profile_name = os.getenv('profile_name')
 37 | 
 38 | bedrock_region_name = config["BEDROCK"]["AWS_REGION_NAME"]
 39 | bedrock_endpoint_url = config["BEDROCK"]["BEDROCK_ENDPOINT_URL"]
 40 | 
 41 | # instantiating the Bedrock client, and passing in the CLI profile
 42 | # TODO should be done lazyly only for bedrock
 43 | bedrock = boto3.client('bedrock-runtime', bedrock_region_name,
 44 |                        endpoint_url=bedrock_endpoint_url)
 45 | 
 46 | model_kwargs = {
 47 |     #"maxTokenCount": 4096,
 48 |     #"stopSequences": [],
 49 |     "temperature": 0,
 50 |     #"topP": 1,
 51 | }
 52 | 
 53 | 
 54 | def get_model_provider(model_name:str = None) -> Optional[str]:
 55 |     provider = None
 56 |     if model_name is None:
 57 |         provider = config['MODEL_PROVIDER']['MODEL_PROVIDER']
 58 |     elif model_name.startswith("gpt"):
 59 |         provider = "OPENAI"
 60 |     elif model_name.startswith("mistral-"):
 61 |         provider = "MISTRAL"
 62 |     elif model_name.startswith("mistral.mi"):
 63 |         provider = "BEDROCK"
 64 |     elif model_name.startswith("anthropic"):
 65 |         provider = "BEDROCK"
 66 | 
 67 |     return provider
 68 | 
 69 | 
 70 | def get_model_name(provider: str, model_name: str = None) -> Optional[str]:
 71 | 
 72 |     if provider is None:
 73 |         provider = config['MODEL_PROVIDER']['MODEL_PROVIDER']
 74 | 
 75 |     if provider == "AZURE":
 76 |         model_name = config['AZURE']['AZURE_OPENAI_API_VERSION']
 77 |     elif provider == "OPENAI":
 78 |         if model_name is None:
 79 |             model_name = config['OPENAI']['OPENAI_MODEL_NAME']
 80 |     elif provider == "MISTRAL":
 81 |         if model_name is None:
 82 |             model_name = config['MISTRAL']['CHAT_MODEL']
 83 |     elif provider == "BEDROCK":
 84 |         if model_name is None:
 85 |             model_name = config['BEDROCK']['CHAT_MODEL']
 86 | 
 87 |     return model_name
 88 | 
 89 | 
 90 | def load_model(model_name: str = None, temperature: float = 0, streaming:bool = False) -> BaseChatModel:
 91 | 
 92 |     provider = get_model_provider(model_name)
 93 |     model_name = get_model_name(model_name=model_name, provider=provider)
 94 |     if provider == "AZURE":
 95 |         llm = AzureChatOpenAI(
 96 |             openai_api_version=config['AZURE']['AZURE_OPENAI_API_VERSION'],
 97 |             azure_endpoint=config['AZURE']['AZURE_OPENAI_ENDPOINT'],
 98 |             azure_deployment=model_name,
 99 |             api_key=os.environ["AZURE_OPENAI_API_KEY"]
100 |         )
101 |     elif provider == "OPENAI":
102 |         llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=streaming)
103 |     elif provider == "MISTRAL":
104 |         llm = ChatMistralAI(mistral_api_key=mistral_api_key, model=model_name, temperature=temperature)
105 |     elif provider == "BEDROCK":
106 |         # ChatBedrock --> must be adapted for system prompt get error "first message must use the "user" role"
107 |         # temperature not supported
108 |         llm = ChatBedrock(
109 |             client=bedrock,
110 |             model_id=model_name,
111 |             streaming=streaming
112 |         )
113 | 
114 |     else:
115 |         raise NotImplementedError(f"Model {provider} unknown.")
116 | 
117 |     return llm
118 | 
119 | 
120 | def load_llamaindex_model(model_name: str = None, temperature: float = 0) -> LLM:
121 | 
122 |     provider = get_model_provider(model_name)
123 |     model_name = get_model_name(provider=provider, model_name=model_name)
124 | 
125 |     if provider == "AZURE":
126 |         raise NotImplementedError(f"Model {provider} unsupported for LlamaIndex.")
127 |     elif provider == "OPENAI":
128 |         llm = LIOpenAI(model=model_name, temperature=temperature)
129 |     elif provider == "MISTRAL":
130 |         llm = LIMistralAI(api_key=mistral_api_key, model=model_name, temperature=temperature)
131 |     elif provider == "BEDROCK":
132 |         # ChatBedrock --> must be adapted for system prompt get error "first message must use the "user" role"
133 |         llm = LIBedrock(
134 |             client=bedrock,
135 |             model=model_name,
136 |             temperature=temperature
137 |         )
138 |     else:
139 |         raise NotImplementedError(f"Model {model_name} unknown.")
140 | 
141 |     return llm
142 | 
143 | 
144 | def get_embeddings_model_name(provider: str, embeddings_model_name: str = None) -> Optional[str]:
145 | 
146 |     if provider is None:
147 |         provider = config['MODEL_PROVIDER']['MODEL_PROVIDER']
148 | 
149 |     if provider == "AZURE":
150 |         embeddings_model_name = config['AZURE']['AZURE_OPENAI_EMBEDDING_DEPLOYMENT']
151 |     elif provider == "OPENAI":
152 |         if embeddings_model_name is None:
153 |             embeddings_model_name = config['OPENAI']['EMBEDDINGS_MODEL']
154 |     elif provider == "MISTRAL":
155 |         if embeddings_model_name is None:
156 |             embeddings_model_name = config['MISTRAL']['EMBEDDINGS_MODEL']
157 |     elif provider == "BEDROCK":
158 |         if embeddings_model_name is None:
159 |             embeddings_model_name = config["BEDROCK"]["EMBEDDINGS_MODEL"]
160 | 
161 |     return embeddings_model_name
162 | 
163 | 
164 | 
165 | def load_embeddings(model_name: str = None) -> Embeddings:
166 | 
167 |     provider = get_model_provider(model_name)
168 |     embeddings_model = get_embeddings_model_name(provider=provider, embeddings_model_name=model_name)
169 | 
170 |     if provider == "AZURE":
171 |         embeddings = AzureOpenAIEmbeddings(
172 |             azure_deployment=embeddings_model,
173 |             azure_endpoint=config['AZURE']['AZURE_OPENAI_ENDPOINT'],
174 |             openai_api_version=config['AZURE']["AZURE_OPENAI_API_VERSION"],
175 |             api_key=os.environ["AZURE_OPENAI_API_KEY"]
176 |         )
177 |     elif provider == "OPENAI":
178 |         embeddings = OpenAIEmbeddings(model_name=embeddings_model)
179 |     elif provider == "MISTRAL":
180 |         embeddings = MistralAIEmbeddings(model_name=embeddings_model)
181 |     elif provider == "BEDROCK":
182 |         embeddings = BedrockEmbeddings(
183 |             client=bedrock,
184 |             region_name=bedrock_region_name,
185 |             model_id=embeddings_model)
186 |     else:
187 |         raise NotImplementedError(f"Model {model_name} unknown.")
188 | 
189 |     return embeddings
190 | 
191 | 
192 | def load_llamaindex_embeddings(model_name: str = None) -> BaseEmbedding:
193 | 
194 |     provider = get_model_provider(model_name)
195 |     embeddings_model = get_embeddings_model_name(provider=provider, embeddings_model_name=model_name)
196 | 
197 |     if provider == "AZURE":
198 |         raise NotImplementedError(f"Embeddings {provider} unsupported for LlamaIndex.")
199 |     elif provider == "OPENAI":
200 |         embeddings = LIOpenAIEmbedding(model=embeddings_model)
201 |     elif provider == "MISTRAL":
202 |         embeddings = LIMistralAIEmbedding(model_name=embeddings_model)
203 |     elif provider == "BEDROCK":
204 |         embeddings = LIBedrockEmbedding(
205 |             region_name=bedrock_region_name,
206 |             model_name=embeddings_model,
207 |             client=bedrock)
208 |     else:
209 |         raise NotImplementedError(f"Model {model_name} unknown.")
210 | 
211 |     return embeddings
212 | 
213 | 
214 | 


--------------------------------------------------------------------------------
/rag_assistant/utils/utilsrag_lc.py:
--------------------------------------------------------------------------------
  1 | from langchain.chains.retrieval_qa.base import RetrievalQA
  2 | from langchain_core.language_models import BaseChatModel
  3 | from langchain_core.vectorstores import VectorStore
  4 | from pydantic import BaseModel, Field
  5 | 
  6 | from langchain.docstore.document import Document
  7 | 
  8 | from langchain.schema.prompt_template import format_document
  9 | 
 10 | from langchain.chains.combine_documents import collapse_docs, split_list_of_docs
 11 | from functools import partial
 12 | from operator import itemgetter
 13 | 
 14 | from langchain.callbacks.manager import trace_as_chain_group
 15 | 
 16 | from langchain.utils.openai_functions import convert_pydantic_to_openai_function
 17 | from langchain.output_parsers.openai_functions import PydanticOutputFunctionsParser
 18 | 
 19 | from langchain_openai import ChatOpenAI
 20 | from langchain.prompts import PromptTemplate
 21 | from langchain.schema import StrOutputParser
 22 | from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
 23 | 
 24 | from .config_loader import load_config
 25 | 
 26 | config = load_config()
 27 | 
 28 | # "local:BAAI/bge-small-en-v1.5"
 29 | 
 30 | 
 31 | # https://python.langchain.com/docs/use_cases/question_answering/
 32 | # https://python.langchain.com/docs/modules/chains/document/stuff
 33 | # https://python.langchain.com/docs/modules/chains/document/map_reduce
 34 | # https://python.langchain.com/docs/modules/chains/document/refine
 35 | # https://python.langchain.com/docs/modules/chains/document/map_rerank
 36 | def invoke(question: str, template: str, llm: BaseChatModel, chain_type: str, vectorstore: VectorStore,
 37 |            search_type: str, k: int, verbose: bool):
 38 | 
 39 |     retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={'k': k})
 40 |     docs = retriever.invoke(question)
 41 |     output = None
 42 | 
 43 |     if verbose:
 44 |         print(docs)
 45 | 
 46 |     document_prompt = PromptTemplate.from_template("{page_content}")
 47 |     partial_format_document = partial(format_document, prompt=document_prompt)
 48 |     # temporary to replace with incoming question
 49 | 
 50 |     map_prompt = PromptTemplate.from_template(
 51 |         "Answer the user question using the context."
 52 |         "\n\nContext:\n\n{context}\n\nQuestion: {question}"
 53 |     )
 54 | 
 55 |     rag_prompt_custom = PromptTemplate.from_template(template)
 56 | 
 57 |     def format_docs(docs):
 58 |         return "\n\n".join(doc.page_content for doc in docs)
 59 | 
 60 |     if chain_type == "stuff":
 61 |         print("stuff chain")
 62 | 
 63 |         rag_chain = (
 64 |                 {
 65 |                     "context": lambda x: "\n\n".join(
 66 |                         format_document(doc, document_prompt) for doc in x["docs"]
 67 |                     ),
 68 |                     "question": itemgetter("question"),
 69 |                 }
 70 |                 | map_prompt
 71 |                 | llm
 72 |                 | StrOutputParser()
 73 |         )
 74 |         output = rag_chain.invoke({"docs": docs, "question": question})
 75 | 
 76 |     elif chain_type == "map_reduce":
 77 | 
 78 |         print("map_reduce chain")
 79 | 
 80 |         # PromptTemplate.from_template("Summarize this content:\n\n{context}")
 81 |         first_prompt = PromptTemplate.from_template(
 82 |             "Answer the user question using the context."
 83 |             "\n\nContext:\n\n{context}\n\nQuestion: " + question
 84 |         )
 85 |         # first_prompt = first_prompt.format_prompt(question=question)
 86 | 
 87 |         # The chain we'll apply to each individual document.
 88 |         map_chain = (
 89 |                 {"context": partial_format_document}
 90 |                 | first_prompt
 91 |                 | llm
 92 |                 | StrOutputParser()
 93 |         )
 94 | 
 95 |         # A wrapper chain to keep the original Document metadata
 96 |         map_as_doc_chain = (
 97 |                 RunnableParallel({"doc": RunnablePassthrough(), "context": map_chain})
 98 |                 | (
 99 |                     lambda x: Document(page_content=x["context"], metadata=x["doc"].metadata)
100 |                 )
101 |         ).with_config(run_name="Summarize (return doc)")
102 | 
103 |         # The chain we'll repeatedly apply to collapse subsets of the documents
104 |         # into a consolidate document until the total token size of our
105 |         # documents is below some max size.
106 |         collapse_chain = (
107 |                 {"context": format_docs}
108 |                 | PromptTemplate.from_template("Collapse this content:\n\n{context}")
109 |                 | llm
110 |                 | StrOutputParser()
111 |         )
112 | 
113 |         def get_num_tokens(docs):
114 |             return llm.get_num_tokens(format_docs(docs))
115 | 
116 |         def collapse(
117 |                 docs,
118 |                 config,
119 |                 token_max=4000,
120 |         ):
121 |             collapse_ct = 1
122 |             while get_num_tokens(docs) > token_max:
123 |                 config["run_name"] = f"Collapse {collapse_ct}"
124 |                 invoke = partial(collapse_chain.invoke, config=config)
125 |                 split_docs = split_list_of_docs(docs, get_num_tokens, token_max)
126 |                 docs = [collapse_docs(_docs, invoke) for _docs in split_docs]
127 |                 collapse_ct += 1
128 |             return docs
129 | 
130 |         # The chain we'll use to combine our individual document summaries
131 |         # (or summaries over subset of documents if we had to collapse the map results)
132 |         # into a final summary.
133 |         reduce_chain = (
134 |                 {"context": format_docs}
135 |                 | PromptTemplate.from_template("Combine these answers:\n\n{context}")
136 |                 | llm
137 |                 | StrOutputParser()
138 |         ).with_config(run_name="Reduce")
139 | 
140 |         # The final full chain
141 |         rag_chain = (map_as_doc_chain.map() | collapse | reduce_chain).with_config(
142 |             run_name="Map reduce"
143 |         )
144 | 
145 |         output = rag_chain.invoke(docs, config={"max_concurrency": 5})
146 | 
147 |     elif chain_type == "map_rerank":
148 |         print("map_reduce chain")
149 | 
150 |         # Chain to apply to each individual document. Chain
151 |         # provides an answer to the question based on the document
152 |         # and scores it's confidence in the answer.
153 |         class AnswerAndScore(BaseModel):
154 |             """Return the answer to the question and a relevance score."""
155 | 
156 |             answer: str = Field(
157 |                 description="The answer to the question, which is based ONLY on the provided context."
158 |             )
159 |             score: float = Field(
160 |                 description="A 0.0-1.0 relevance score, where 1.0 indicates the provided context answers the question completely and 0.0 indicates the provided context does not answer the question at all."
161 |             )
162 | 
163 |         function = convert_pydantic_to_openai_function(AnswerAndScore)
164 |         map_chain = (
165 |                 map_prompt
166 |                 | ChatOpenAI().bind(
167 |             temperature=0, functions=[function], function_call={"name": "AnswerAndScore"}
168 |         )
169 |                 | PydanticOutputFunctionsParser(pydantic_schema=AnswerAndScore)
170 |         ).with_config(run_name="Map")
171 | 
172 |         # Final chain, which after answer and scoring based on
173 |         # each doc return the answer with the highest score.
174 | 
175 |         def top_answer(scored_answers):
176 |             return max(scored_answers, key=lambda x: x.score).answer
177 | 
178 |         # document_prompt = PromptTemplate.from_template("{page_content}")
179 |         rag_chain = (
180 |                 (
181 |                     lambda x: [
182 |                         {
183 |                             "context": format_document(doc, document_prompt),
184 |                             "question": question,  # x["question"]
185 |                         }
186 |                         for doc in x["docs"]
187 |                     ]
188 |                 )
189 |                 | map_chain.map()
190 |                 | top_answer
191 |         ).with_config(run_name="Map rerank")
192 | 
193 |         output = rag_chain.invoke({"docs": docs, "question": question})
194 | 
195 |     elif chain_type == "refine":
196 |         # first_prompt = PromptTemplate.from_template("Summarize this content:\n\n{context}")
197 |         first_prompt = PromptTemplate.from_template(
198 |             "Answer the user question using the context."
199 |             "\n\nContext:\n\n{context}\n\nQuestion: " + question
200 |         )
201 |         document_prompt = PromptTemplate.from_template("{page_content}")
202 |         partial_format_doc = partial(format_document, prompt=document_prompt)
203 |         summary_chain = {"context": partial_format_doc} | first_prompt | llm | StrOutputParser()
204 |         refine_prompt = PromptTemplate.from_template(
205 |             "Answer the user question."
206 |             "\n\nHere's your first summary: {prev_response}. "
207 |             "\n\nNow add to it based on the following context: {context}\n\nQuestion: " + question
208 |         )
209 |         refine_chain = (
210 |                 {
211 |                     "prev_response": itemgetter("prev_response"),
212 |                     "context": lambda x: partial_format_doc(x["doc"]),
213 |                 }
214 |                 | refine_prompt
215 |                 | llm
216 |                 | StrOutputParser()
217 |         )
218 | 
219 |         def refine_loop(docs):
220 |             with trace_as_chain_group("refine loop", inputs={"input": docs}) as manager:
221 |                 summary = summary_chain.invoke(
222 |                     docs[0], config={"callbacks": manager, "run_name": "initial summary"}
223 |                 )
224 |                 for i, doc in enumerate(docs[1:]):
225 |                     summary = refine_chain.invoke(
226 |                         {"prev_response": summary, "doc": doc},
227 |                         config={"callbacks": manager, "run_name": f"refine {i}"},
228 |                     )
229 |                 manager.on_chain_end({"output": summary})
230 |             return summary
231 | 
232 |         output = refine_loop(docs)
233 |     return output
234 | 
235 | 
236 | def agent_lc_factory(chain_type, llm, search_kwargs, search_type, vectorstore):
237 | 
238 |     retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs=search_kwargs)
239 |     retrieval_qa_chain = RetrievalQA.from_chain_type(
240 |         llm=llm, chain_type=chain_type, retriever=retriever
241 |     )
242 |     return retrieval_qa_chain
243 | 


--------------------------------------------------------------------------------
/rag_assistant/legacy_ux/1_RAG_agent_with_LC.py:
--------------------------------------------------------------------------------
  1 | from json import JSONDecodeError
  2 | from typing import Union
  3 | 
  4 | import chromadb
  5 | import streamlit as st
  6 | from langchain.agents import AgentExecutor, create_structured_chat_agent
  7 | from langchain_community.vectorstores.chroma import Chroma
  8 | from langchain_core.documents import Document
  9 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 10 | from langchain_core.tools import Tool, ToolException
 11 | from langsmith import traceable
 12 | from streamlit.runtime.uploaded_file_manager import UploadedFile
 13 | 
 14 | from utils.config_loader import load_config
 15 | from utils.utilsdoc import load_doc
 16 | 
 17 | from utils.utilsrag_lc import agent_lc_factory
 18 | 
 19 | from utils.utilsllm import load_model, load_embeddings
 20 | 
 21 | from dotenv import load_dotenv, find_dotenv
 22 | 
 23 | from langchain_core.runnables.history import RunnableWithMessageHistory
 24 | from langchain_community.chat_message_histories import (
 25 |     StreamlitChatMessageHistory,
 26 | )
 27 | 
 28 | from langchain_core.tracers.context import tracing_v2_enabled
 29 | 
 30 | # EXTERNALISATION OF PROMPTS TO HAVE THEIR OWN VERSIONING
 31 | from shared.rag_prompts import __structured_chat_agent__, human
 32 | 
 33 | load_dotenv(find_dotenv())
 34 | 
 35 | config = load_config()
 36 | 
 37 | app_name = config['DEFAULT']['APP_NAME']
 38 | LLM_MODEL = config['MODEL_PROVIDER']['MODEL_PROVIDER']
 39 | 
 40 | topics = ["Cloud", "Security", "GenAI", "Application", "Architecture", "AWS", "Other"]
 41 | 
 42 | model_to_index = {
 43 |     "OPENAI": 0,
 44 |     "MISTRAL": 1,
 45 |     "BEDROCK": 2
 46 | }
 47 | 
 48 | 
 49 | def load_sidebar():
 50 |     with st.sidebar:
 51 |         st.header("Parameters")
 52 |         st.sidebar.subheader("LangChain model provider")
 53 |         st.sidebar.checkbox("OpenAI", LLM_MODEL == "OPENAI", disabled=True)
 54 |         st.sidebar.checkbox("Mistral", LLM_MODEL == "MISTRAL", disabled=True)
 55 |         st.sidebar.checkbox("Bedrock", LLM_MODEL == "BEDROCK", disabled=True)
 56 | 
 57 | 
 58 | def _load_doc(pdfs: Union[list[UploadedFile], None, UploadedFile]) -> list[Document]:
 59 |     # loader = PyPDFDirectoryLoader("data/sources/pdf/")
 60 |     # all_docs = loader.load()
 61 |     all_docs = load_doc(pdfs)
 62 |     return all_docs
 63 | 
 64 | 
 65 | def configure_agent(all_docs: list[Document], model_name, chain_type, search_type="similarity", search_kwargs=None):
 66 | 
 67 |     embeddings_rag = load_embeddings(model_name)
 68 |     llm_rag = load_model(model_name, temperature=0.1)
 69 | 
 70 |     chroma_client = chromadb.EphemeralClient()
 71 | 
 72 |     vectorstore = Chroma.from_documents(
 73 |         documents=all_docs,
 74 |         embedding=embeddings_rag,
 75 |         client=chroma_client,
 76 |         collection_name="RAG_LC_Agent"
 77 |     )
 78 | 
 79 |     # vectorstore = get_store() # embeddings_rag, collection_name="RAG_LC_Agent")
 80 |     retrieval_qa_chain = agent_lc_factory(chain_type, llm_rag, search_kwargs,
 81 |                                           search_type, vectorstore)
 82 | 
 83 |     def _handle_error(error: ToolException) -> str:
 84 |         if error == JSONDecodeError:
 85 |             return "Reformat in JSON and try again"
 86 |         elif error.args[0].startswith("Too many arguments to single-input tool"):
 87 |             return "Format in a SINGLE STRING. DO NOT USE MULTI-ARGUMENTS INPUT."
 88 |         return (
 89 |                 "The following errors occurred during tool execution:"
 90 |                 + error.args[0]
 91 |                 + "Please try another tool.")
 92 | 
 93 |     lc_tools = [
 94 |         Tool(
 95 |             name=f"Knowledge Agent (LC)",
 96 |             func=retrieval_qa_chain,
 97 |             description=f"""Useful when you need to answer questions on {topics}. "
 98 |                         "DO NOT USE MULTI-ARGUMENTS INPUT.""",
 99 |             handle_tool_error=_handle_error,
100 |         ),
101 |     ]
102 |     ## START LANGCHAIN
103 |     # MODEL FOR LANGCHAIN IS DEFINE GLOBALLY IN CONF/CONFIG.INI
104 |     # defaulting to "gpt-4-turbo" because it is the only one resilient
105 |     llm_agent = load_model("gpt-4o")
106 | 
107 |     prompt = ChatPromptTemplate.from_messages(
108 |                 [
109 |                     ("system", __structured_chat_agent__),
110 |                     MessagesPlaceholder("rag_chat_history", optional=True),
111 |                     ("human", human),
112 |                 ]
113 |             )
114 | 
115 |     # create_react_agent
116 |     agent = create_structured_chat_agent(
117 |         llm=llm_agent,
118 |         tools=lc_tools,
119 |         prompt=prompt
120 |     )
121 | 
122 |     #
123 |     # TODO
124 |     # sometimes received "Parsing LLM output produced both a final answer and a parse-able action" with mistral
125 |     # add a handle_parsing_errors, reduce the case but still appears time to time.
126 |     agent_executor = AgentExecutor(
127 |         agent=agent,
128 |         tools=lc_tools,
129 |         handle_parsing_errors="Check your output and make sure it conforms to required format!"
130 |                               "Format is Action:```$JSON_BLOB``` then Observation"
131 |                               " Do not output an action and a final answer at the same time.")
132 |     ## END LANGCHAIN
133 |     return agent_executor
134 | 
135 | 
136 | @traceable(run_type="chain", project_name="RAG Assistant", tags=["LangChain", "RAG", "Agent"])
137 | def call_chain(chain_with_history, prompt):
138 |     config = {"configurable": {"session_id": "any"}}
139 |     response = chain_with_history.invoke(
140 |         input={
141 |             "input": prompt
142 |         },
143 |         config=config
144 |     )
145 |     answer = f"🦜: {response['output']}"
146 |     st.write(answer)
147 | 
148 | 
149 | def main():
150 | 
151 |     st.title("Question Answering Assistant (RAG)")
152 | 
153 |     load_sidebar()
154 | 
155 |     model_index = model_to_index[LLM_MODEL]
156 |     agent_model = st.sidebar.radio("RAG Agent LLM Provider", ["OPENAI", "MISTRAL", "BEDROCK"], index=model_index)
157 | 
158 |     st.sidebar.subheader("RAG Agent Model")
159 |     model_name_gpt = st.sidebar.radio("OpenAI Model", ["gpt-3.5-turbo", "gpt-4-turbo", "gpt-4o"],
160 |                                       captions=["GPT 3.5 Turbo", "GPT 4 Turbo", "GPT 4 Omni"],
161 |                                       index=0, disabled=agent_model != "OPENAI")
162 | 
163 |     model_name_mistral = st.sidebar.radio("Mistral Model", ["mistral-small-latest", "mistral-medium-latest", "mistral-large-latest"],
164 |                                           captions=["Mistral 7b", "Mixtral", "Mistral Large"],
165 |                                           index=2, disabled=agent_model != "MISTRAL")
166 | 
167 |     model_name_bedrock = st.sidebar.radio("Bedrock Model", ["mistral.mistral-large-2402-v1:0",
168 |                                                             "anthropic.claude-3-sonnet-20240229-v1:0"],
169 |                                           captions=["Mistral Large",
170 |                                                     "Claude 3 Sonnet"],
171 |                                           index=0, disabled=agent_model != "BEDROCK")
172 | 
173 |     model_name = None
174 |     if agent_model == "MISTRAL":
175 |         model_name = model_name_mistral
176 |     elif agent_model == "OPENAI":
177 |         model_name = model_name_gpt
178 |     elif agent_model == "BEDROCK":
179 |         model_name = model_name_bedrock
180 | 
181 |     chain_type = st.sidebar.radio("Chain type (LangChain)",
182 |                                   ["stuff", "map_reduce", "refine", "map_rerank"])
183 | 
184 |     st.sidebar.subheader("Search params (LangChain)")
185 |     k = st.sidebar.slider('Number of relevant chunks', 2, 10, 4, 1)
186 | 
187 |     search_type = st.sidebar.radio("Search Type", ["similarity", "mmr",
188 |                                                     "similarity_score_threshold"])
189 | 
190 |     pdfs = st.file_uploader("Document(s) à transmettre", type=['pdf', 'txt', 'md'], accept_multiple_files=True)
191 | 
192 |     disabled = True
193 | 
194 |     docs = []
195 |     # if st.button("Transmettre", disabled=disabled):
196 |         # calling an internal function for adapting LC or LI Document
197 |     docs = _load_doc(pdfs)
198 | 
199 |     if (docs is not None) and (len(docs)):
200 |         disabled = False
201 | 
202 |     if not disabled:
203 | 
204 |         history = StreamlitChatMessageHistory(key="rag_chat_history")
205 |         if len(history.messages) == 0:
206 |             history.add_ai_message("What do you want to know?")
207 | 
208 |         view_messages = st.expander("View the message contents in session state")
209 | 
210 |         st.header("RAG agent with LangChain")
211 |         agent = configure_agent(docs, model_name, chain_type, search_type, {"k": k})
212 | 
213 |         chain_with_history = RunnableWithMessageHistory(
214 |             agent,
215 |             lambda session_id: history,
216 |             input_messages_key="input",
217 |             history_messages_key="rag_chat_history",
218 |         )
219 | 
220 |         # Display chat messages from history on app rerun
221 |         for message in history.messages:
222 |             with st.chat_message(message.type):
223 |                 st.markdown(message.content)
224 | 
225 |         # Accept user input
226 |         if prompt := st.chat_input():
227 |             # Add user message to chat history
228 |             # Note: new messages are saved to history automatically by Langchain during run
229 |             # st.session_state.messages.append({"role": "user", "content": prompt})
230 |             # Display user message in chat message container
231 |             with st.chat_message("user"):
232 |                 st.markdown(prompt)
233 | 
234 |             # Display assistant response in chat message container
235 |             with st.chat_message("assistant"):
236 |                 # Display assistant response in chat message container
237 |                 # with tracing_v2_enabled(project_name="Applied AI RAG Assistant",
238 |                 #                         tags=["LangChain", "Agent"]):
239 |                 call_chain(chain_with_history, prompt)
240 | 
241 |         # Draw the messages at the end, so newly generated ones show up immediately
242 |         with view_messages:
243 |             """
244 |             Message History initialized with:
245 |             ```python
246 |             msgs = StreamlitChatMessageHistory(key="rag_chat_history")
247 |             ```
248 |     
249 |             Contents of `st.session_state.rag_chat_history`:
250 |             """
251 |             view_messages.json(st.session_state.rag_chat_history)
252 | 
253 | 
254 | if __name__ == "__main__":
255 |     main()
256 | 


--------------------------------------------------------------------------------