├── tests ├── utils │ ├── __init__.py │ ├── eBook-How-to-Build-a-Career-in-AI.pdf │ ├── eval_questions.txt │ ├── test_utilsrag_lc.py │ ├── test_utilsfile.py │ └── test_utilsrag_li.py ├── rag_bedrock │ ├── eval_document.pdf │ ├── eval_questions.txt │ ├── test_rag_llamaindex.py │ └── test_rag_langchain.py ├── view_tru_dashboard.py └── conftest.py ├── rag_assistant ├── shared │ ├── __init__.py │ ├── rag_prompts.py │ └── llm_facade.py ├── utils │ ├── __init__.py │ ├── config_loader.py │ ├── constants.py │ ├── auth.py │ ├── utilsfile.py │ ├── utilsvision.py │ ├── utilsllm.py │ └── utilsrag_lc.py ├── Hello.py ├── streamlit_app.py ├── pages │ ├── 2_Load_Document.py │ └── 3_RAG_Admin.py └── legacy_ux │ └── 1_RAG_agent_with_LC.py ├── terraform └── aws │ ├── providers.tf │ ├── s3.tf │ ├── kms.tf │ ├── data.tf │ ├── route53.tf │ ├── security_group.tf │ ├── efs.tf │ ├── opensearch.tf │ ├── outputs.tf │ ├── lb.tf │ ├── variables.tf │ ├── iam.tf │ └── ecs.tf ├── data └── sources │ ├── pdf │ ├── arxiv │ │ └── 2210.01241.pdf │ ├── GenAI │ │ └── aws-caf-for-ai.pdf │ ├── aws │ │ └── serverless │ │ │ └── serverless-core.pdf │ ├── Security │ │ ├── Cloud Security Guide for SMEs.pdf │ │ └── LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf │ ├── Application │ │ └── beyond-the-twelve-factor-app.pdf │ ├── Cloud │ │ ├── aws-cloud-adoption-framework_fr-FR.pdf │ │ └── cloud-enablement-engine-practical-guide.pdf │ ├── Architecture │ │ └── AWS_Well-Architected_Framework.pdf │ └── Risk │ │ └── Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf │ └── md │ └── 12factor │ ├── en │ ├── who.md │ ├── intro.md │ ├── toc.md │ ├── background.md │ ├── codebase.md │ ├── port-binding.md │ ├── admin-processes.md │ ├── logs.md │ ├── processes.md │ ├── build-release-run.md │ ├── dependencies.md │ ├── concurrency.md │ ├── backing-services.md │ ├── disposability.md │ ├── config.md │ └── dev-prod-parity.md │ └── fr │ ├── who.md │ ├── intro.md │ ├── toc.md │ ├── background.md │ ├── port-binding.md │ ├── codebase.md │ ├── admin-processes.md │ ├── logs.md │ ├── processes.md │ ├── dependencies.md │ ├── build-release-run.md │ ├── backing-services.md │ ├── disposability.md │ ├── concurrency.md │ ├── config.md │ └── dev-prod-parity.md ├── docker-compose.yml ├── .github └── workflows │ ├── python_test.yml │ └── aws.yml ├── .gitignore ├── Dockerfile ├── pyproject.toml ├── conf └── config.ini └── README.md /tests/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rag_assistant/shared/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rag_assistant/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /terraform/aws/providers.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = "eu-west-1" 3 | } 4 | -------------------------------------------------------------------------------- /terraform/aws/s3.tf: -------------------------------------------------------------------------------- 1 | resource "aws_s3_bucket" "ai_assistant_bucket" { 2 | bucket = "finaxys-ai-assistant-bucket" 3 | } 4 | -------------------------------------------------------------------------------- /data/sources/pdf/arxiv/2210.01241.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/arxiv/2210.01241.pdf -------------------------------------------------------------------------------- /tests/rag_bedrock/eval_document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/tests/rag_bedrock/eval_document.pdf -------------------------------------------------------------------------------- /tests/view_tru_dashboard.py: -------------------------------------------------------------------------------- 1 | from trulens_eval import Tru 2 | 3 | tru = Tru(database_redact_keys=True) 4 | # tru.reset_database() 5 | tru.run_dashboard() -------------------------------------------------------------------------------- /data/sources/pdf/GenAI/aws-caf-for-ai.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/GenAI/aws-caf-for-ai.pdf -------------------------------------------------------------------------------- /tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf -------------------------------------------------------------------------------- /data/sources/pdf/aws/serverless/serverless-core.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/aws/serverless/serverless-core.pdf -------------------------------------------------------------------------------- /terraform/aws/kms.tf: -------------------------------------------------------------------------------- 1 | resource "aws_kms_key" "key" { 2 | description = "Ai Assistant ECS cluster CloudWatch log KMS key" 3 | deletion_window_in_days = 7 4 | } 5 | -------------------------------------------------------------------------------- /data/sources/pdf/Security/Cloud Security Guide for SMEs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Security/Cloud Security Guide for SMEs.pdf -------------------------------------------------------------------------------- /data/sources/pdf/Application/beyond-the-twelve-factor-app.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Application/beyond-the-twelve-factor-app.pdf -------------------------------------------------------------------------------- /data/sources/pdf/Cloud/aws-cloud-adoption-framework_fr-FR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Cloud/aws-cloud-adoption-framework_fr-FR.pdf -------------------------------------------------------------------------------- /data/sources/pdf/Architecture/AWS_Well-Architected_Framework.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Architecture/AWS_Well-Architected_Framework.pdf -------------------------------------------------------------------------------- /data/sources/pdf/Cloud/cloud-enablement-engine-practical-guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Cloud/cloud-enablement-engine-practical-guide.pdf -------------------------------------------------------------------------------- /data/sources/md/12factor/en/who.md: -------------------------------------------------------------------------------- 1 | Who should read this document? 2 | ============================== 3 | 4 | Any developer building applications which run as a service. Ops engineers who deploy or manage such applications. 5 | -------------------------------------------------------------------------------- /data/sources/pdf/Security/LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Security/LLM_AI_Security_and_Governance_Checklist-v1_FR.pdf -------------------------------------------------------------------------------- /data/sources/pdf/Risk/Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BittnerPierre/applied-ai-rag-assistant/HEAD/data/sources/pdf/Risk/Questionnaire d'évaluation des risques applicatifs pour le Cloud Public.pdf -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/who.md: -------------------------------------------------------------------------------- 1 | Qui devrait lire ce document ? 2 | ============================== 3 | 4 | Tout développeur qui construit des applications qui fonctionnent en tant que service, ainsi que les personnes qui déploient et gèrent de telles applications. 5 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | langchain-streamlit-agent: 4 | image: applied-ai-rag-assistant:latest 5 | build: ./app 6 | command: streamlit run rag_assistant/Hello.py --server.port 8051 7 | volumes: 8 | - ./rag_assistant/:/app/rag_assistant 9 | ports: 10 | - 8051:8051 -------------------------------------------------------------------------------- /rag_assistant/utils/config_loader.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from pathlib import Path 3 | 4 | 5 | def load_config(): 6 | base_dir = Path(__file__).resolve().parent.parent # Chemin du dossier projet 7 | config_path = base_dir.parent / 'conf' / 'config.ini' 8 | 9 | config = configparser.ConfigParser() 10 | config.read(config_path) 11 | return config 12 | -------------------------------------------------------------------------------- /terraform/aws/data.tf: -------------------------------------------------------------------------------- 1 | data "aws_vpc" "ai_assistant_vpc" { 2 | id = var.vpc_id 3 | } 4 | 5 | data "aws_subnet" "ai_assistant_subnet_1" { 6 | id = var.subnet_id_1 7 | } 8 | 9 | data "aws_subnet" "ai_assistant_subnet_2" { 10 | id = var.subnet_id_2 11 | } 12 | 13 | data "aws_subnet" "ai_assistant_subnet_3" { 14 | id = var.subnet_id_3 15 | } 16 | 17 | data "aws_secretsmanager_secret" "secret" { 18 | name = var.secret_name 19 | } 20 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # tests/conftest.py 2 | import sys 3 | import os 4 | 5 | import pytest 6 | 7 | from trulens_eval import ( 8 | Tru 9 | ) 10 | 11 | 12 | # Add the root directory of the project to the Python path 13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def trulens_prepare(): 18 | tru = Tru(database_redact_keys=True) 19 | tru.reset_database() 20 | return tru -------------------------------------------------------------------------------- /tests/utils/eval_questions.txt: -------------------------------------------------------------------------------- 1 | What are the keys to building a career in AI? 2 | How can teamwork contribute to success in AI? 3 | What is the importance of networking in AI? 4 | What are some good habits to develop for a successful career? 5 | How can altruism be beneficial in building a career? 6 | What is imposter syndrome and how does it relate to AI? 7 | Who are some accomplished individuals who have experienced imposter syndrome? 8 | What is the first step to becoming good at AI? 9 | What are some common challenges in AI? 10 | Is it normal to find parts of AI challenging? -------------------------------------------------------------------------------- /terraform/aws/route53.tf: -------------------------------------------------------------------------------- 1 | data "aws_acm_certificate" "ai_assistant_certificate" { 2 | domain = var.dns_url_app_subnet 3 | } 4 | 5 | data "aws_route53_zone" "ai_assistant_zone" { 6 | name = var.dns_url 7 | } 8 | 9 | resource "aws_route53_record" "record" { 10 | zone_id = data.aws_route53_zone.ai_assistant_zone.zone_id 11 | name = var.dns_url_app_subnet 12 | type = "A" 13 | alias { 14 | name = aws_lb.application_lb.dns_name 15 | zone_id = aws_lb.application_lb.zone_id 16 | evaluate_target_health = true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /tests/rag_bedrock/eval_questions.txt: -------------------------------------------------------------------------------- 1 | How can participating in competitions like Kaggle contribute to your AI career growth? 2 | What are the keys to building a career in AI? 3 | How can teamwork contribute to success in AI? 4 | What is the importance of networking in AI? 5 | What are some good habits to develop for a successful career? 6 | What is imposter syndrome and how does it relate to AI? 7 | Who are some accomplished individuals who have experienced imposter syndrome? 8 | What is the first step to becoming good at AI? 9 | What are some common challenges in AI? 10 | Is it normal to find parts of AI challenging? -------------------------------------------------------------------------------- /rag_assistant/utils/constants.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class DocumentType(Enum): 4 | STANDARD = "Norme" 5 | GUIDE = "Guide" 6 | TUTORIAL = "Tutoriel" 7 | FAQ = "FAQ" 8 | 9 | 10 | class ChunkType(Enum): 11 | TEXT = "Text" 12 | IMAGE = "Image" 13 | 14 | 15 | class Metadata(Enum): 16 | DOCUMENT_TYPE = "document_type" 17 | CHUNK_TYPE = "chunk_type" 18 | TOPIC = "topic" 19 | PAGE = "page" 20 | FILENAME = "filename" 21 | 22 | 23 | class SupportedFileType(Enum): 24 | PDF = "pdf" 25 | MARKDOWN = "md" 26 | TEXT = "txt" 27 | 28 | class StorageType(Enum): 29 | S3 = "S3" 30 | LOCAL = "LOCAL" 31 | NONE = "NONE" 32 | 33 | 34 | class CollectionType(Enum): 35 | DOCUMENTS = "documents" 36 | IMAGES = "images" 37 | -------------------------------------------------------------------------------- /.github/workflows/python_test.yml: -------------------------------------------------------------------------------- 1 | name: Test python application 2 | 3 | on: 4 | push: 5 | branches: [ main, stable ] 6 | pull_request: 7 | branches: [ main, stable ] 8 | 9 | env: 10 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 11 | MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} 12 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 13 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 14 | jobs: 15 | test: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.11 20 | uses: actions/setup-python@v2 21 | with: 22 | python-version: 3.11 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install poetry==1.4.2 27 | poetry install 28 | - name: Test with pytest 29 | run: | 30 | poetry run pytest 31 | 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | .vscode/ 3 | .idea/ 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | .streamlit/ 10 | 11 | # Installer logs 12 | pip-log.txt 13 | pip-delete-this-directory.txt 14 | 15 | # Jupyter Notebook 16 | .ipynb_checkpoints 17 | notebooks/ 18 | 19 | # Environments 20 | .env 21 | .envrc 22 | .venv 23 | .venvs 24 | env/ 25 | venv/ 26 | ENV/ 27 | env.bak/ 28 | venv.bak/ 29 | 30 | 31 | # C extensions 32 | *.so 33 | *.pkl 34 | *.bin 35 | 36 | # macOS display setting files 37 | .DS_Store 38 | 39 | # Terraform 40 | terraform/*/.terraform/** 41 | terraform/*/.terraform* 42 | terraform/*/terraform.* 43 | 44 | 45 | secrets.toml 46 | /data/chroma 47 | 48 | /data/chroma2 49 | /data/UBER 50 | default.sqlite 51 | /merging_index 52 | /sentence_index 53 | /storage/AI 54 | /storage 55 | /data/cache 56 | /data/faiss 57 | /logs 58 | *.log 59 | /tests/rechdocia 60 | /data/llama_index/ 61 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/intro.md: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | In the modern era, software is commonly delivered as a service: called *web apps*, or *software-as-a-service*. The twelve-factor app is a methodology for building software-as-a-service apps that: 5 | 6 | * Use **declarative** formats for setup automation, to minimize time and cost for new developers joining the project; 7 | * Have a **clean contract** with the underlying operating system, offering **maximum portability** between execution environments; 8 | * Are suitable for **deployment** on modern **cloud platforms**, obviating the need for servers and systems administration; 9 | * **Minimize divergence** between development and production, enabling **continuous deployment** for maximum agility; 10 | * And can **scale up** without significant changes to tooling, architecture, or development practices. 11 | 12 | The twelve-factor methodology can be applied to apps written in any programming language, and which use any combination of backing services (database, queue, memory cache, etc). 13 | -------------------------------------------------------------------------------- /terraform/aws/security_group.tf: -------------------------------------------------------------------------------- 1 | resource "aws_security_group" "ai_assistant_security_group" { 2 | name = "ai_assistant-security-group-https" 3 | vpc_id = data.aws_vpc.ai_assistant_vpc.id 4 | 5 | ingress { 6 | from_port = 80 7 | to_port = 80 8 | protocol = "TCP" 9 | cidr_blocks = ["0.0.0.0/0"] 10 | } 11 | ingress { 12 | from_port = 443 13 | to_port = 443 14 | protocol = "TCP" 15 | cidr_blocks = ["0.0.0.0/0"] 16 | } 17 | ingress { 18 | from_port = 2049 19 | to_port = 2049 20 | protocol = "TCP" 21 | cidr_blocks = ["0.0.0.0/0"] 22 | } 23 | egress { 24 | from_port = 0 25 | to_port = 0 26 | protocol = "-1" 27 | cidr_blocks = ["0.0.0.0/0"] 28 | ipv6_cidr_blocks = ["::/0"] 29 | } 30 | } 31 | 32 | resource "aws_security_group" "ai_assistant_opensearch" { 33 | name = "ai-assistant-opensearch" 34 | vpc_id = data.aws_vpc.ai_assistant_vpc.id 35 | 36 | ingress { 37 | from_port = 443 38 | to_port = 443 39 | protocol = "tcp" 40 | cidr_blocks = ["0.0.0.0/0"] 41 | } 42 | } -------------------------------------------------------------------------------- /rag_assistant/Hello.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import sys 3 | 4 | from utils.auth import check_password 5 | from utils.config_loader import load_config 6 | from utils.utilsllm import get_model_provider, get_model_name, get_embeddings_model_name 7 | 8 | config = load_config() 9 | 10 | app_name = config['DEFAULT']['APP_NAME'] 11 | vectordb = config['VECTORDB']['vectordb'] 12 | 13 | st.set_page_config( 14 | page_title="Hello", 15 | page_icon="👋", 16 | ) 17 | 18 | model_provider = get_model_provider() 19 | model_name = get_model_name(provider=model_provider) 20 | embeddings_model = get_embeddings_model_name(provider=model_provider) 21 | 22 | def main(): 23 | st.title(f"""Bienvenue sur {app_name} ! 👋""") 24 | 25 | st.markdown( 26 | f""" 27 | **{app_name}** utilise '**{model_provider}**' avec comme _modèle de langage_ '**{model_name}**'. 28 | 29 | La _base de connaissance_ est sur '**{vectordb}**' avec comme _modèle d'embedding_ : '**{embeddings_model}**'. 30 | """ 31 | ) 32 | st.write(sys.version) 33 | 34 | 35 | if __name__ == "__main__": 36 | 37 | if not check_password(): 38 | # Do not continue if check_password is not True. 39 | st.stop() 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/toc.md: -------------------------------------------------------------------------------- 1 | The Twelve Factors 2 | ================== 3 | 4 | ## [I. Codebase](./codebase) 5 | ### One codebase tracked in revision control, many deploys 6 | 7 | ## [II. Dependencies](./dependencies) 8 | ### Explicitly declare and isolate dependencies 9 | 10 | ## [III. Config](./config) 11 | ### Store config in the environment 12 | 13 | ## [IV. Backing services](./backing-services) 14 | ### Treat backing services as attached resources 15 | 16 | ## [V. Build, release, run](./build-release-run) 17 | ### Strictly separate build and run stages 18 | 19 | ## [VI. Processes](./processes) 20 | ### Execute the app as one or more stateless processes 21 | 22 | ## [VII. Port binding](./port-binding) 23 | ### Export services via port binding 24 | 25 | ## [VIII. Concurrency](./concurrency) 26 | ### Scale out via the process model 27 | 28 | ## [IX. Disposability](./disposability) 29 | ### Maximize robustness with fast startup and graceful shutdown 30 | 31 | ## [X. Dev/prod parity](./dev-prod-parity) 32 | ### Keep development, staging, and production as similar as possible 33 | 34 | ## [XI. Logs](./logs) 35 | ### Treat logs as event streams 36 | 37 | ## [XII. Admin processes](./admin-processes) 38 | ### Run admin/management tasks as one-off processes 39 | -------------------------------------------------------------------------------- /terraform/aws/efs.tf: -------------------------------------------------------------------------------- 1 | 2 | resource "aws_kms_key" "volume_key" { 3 | description = "Ai Assistant EFS Volume key" 4 | deletion_window_in_days = 7 5 | } 6 | 7 | resource "aws_efs_file_system" "ai_assistant_efs_file_system" { 8 | encrypted = true 9 | kms_key_id = aws_kms_key.volume_key.arn 10 | 11 | tags = { 12 | Name = "ai-assistant" 13 | } 14 | } 15 | 16 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_1" { 17 | file_system_id = aws_efs_file_system.ai_assistant_efs_file_system.id 18 | subnet_id = data.aws_subnet.ai_assistant_subnet_1.id 19 | security_groups = [aws_security_group.ai_assistant_security_group.id] 20 | } 21 | 22 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_2" { 23 | file_system_id = aws_efs_file_system.ai_assistant_efs_file_system.id 24 | subnet_id = data.aws_subnet.ai_assistant_subnet_2.id 25 | security_groups = [aws_security_group.ai_assistant_security_group.id] 26 | } 27 | 28 | resource "aws_efs_mount_target" "ai_assistant_efs_mount_target_3" { 29 | file_system_id = aws_efs_file_system.ai_assistant_efs_file_system.id 30 | subnet_id = data.aws_subnet.ai_assistant_subnet_3.id 31 | security_groups = [aws_security_group.ai_assistant_security_group.id] 32 | } 33 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/intro.md: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | À l'époque actuelle, les logiciels sont régulièrement délivrés en tant que services : on les appelle des *applications web* (web apps), ou *logiciels en tant que service* (*software-as-a-service*). L'application 12 facteurs est une méthodologie pour concevoir des logiciels en tant que service qui : 5 | 6 | * Utilisent des formats **déclaratifs** pour mettre en oeuvre l'automatisation, pour minimiser le temps et les coûts pour que de nouveaux développeurs rejoignent le projet; 7 | * Ont un **contrat propre** avec le système d'exploitation sous-jacent, offrant une **portabilité maximum** entre les environnements d'exécution; 8 | * Sont adaptés à des **déploiements** sur des **plateformes cloud** modernes, rendant inutile le besoin de serveurs et de l'administration de systèmes; 9 | * **Minimisent la divergence** entre le développement et la production, ce qui permet le **déploiement continu** pour une agilité maximum; 10 | * et peuvent **grossir verticalement** sans changement significatif dans les outils, l'architecture ou les pratiques de développement; 11 | 12 | La méthodologie 12 facteurs peut être appliquée à des applications écrites dans tout langage de programmation, et qui utilisent tout type de services externes (base de données, file, cache mémoire, etc.) 13 | -------------------------------------------------------------------------------- /terraform/aws/opensearch.tf: -------------------------------------------------------------------------------- 1 | data "aws_iam_policy_document" "opensearch_domain_policy" { 2 | statement { 3 | effect = "Allow" 4 | 5 | principals { 6 | type = "AWS" 7 | identifiers = var.opensearch_allowed_users_and_policy_arn #list of arn 8 | } 9 | 10 | actions = ["es:*"] 11 | resources = ["arn:aws:es:eu-west-1:441525731509:domain/ai-assistant/*"] 12 | } 13 | } 14 | 15 | resource "aws_opensearch_domain" "ai_assistant_opensearch_domain" { 16 | domain_name = var.opensearch_domain_name 17 | engine_version = "OpenSearch_2.13" 18 | 19 | cluster_config { 20 | instance_type = "r5.large.search" 21 | zone_awareness_enabled = false 22 | instance_count = 1 23 | multi_az_with_standby_enabled = false 24 | } 25 | 26 | vpc_options { 27 | subnet_ids = [data.aws_subnet.ai_assistant_subnet_1.id] 28 | security_group_ids = [aws_security_group.ai_assistant_security_group.id] 29 | } 30 | 31 | domain_endpoint_options { 32 | enforce_https = true 33 | tls_security_policy = "Policy-Min-TLS-1-2-2019-07" 34 | } 35 | 36 | node_to_node_encryption { 37 | enabled = true 38 | } 39 | 40 | ebs_options { 41 | ebs_enabled = true 42 | volume_size = 10 43 | } 44 | access_policies = data.aws_iam_policy_document.opensearch_domain_policy.json 45 | } 46 | -------------------------------------------------------------------------------- /rag_assistant/utils/auth.py: -------------------------------------------------------------------------------- 1 | import hmac 2 | import streamlit as st 3 | 4 | def check_password(): 5 | """Returns `True` if the user had the correct password.""" 6 | 7 | if st.session_state.get("password_correct", False): 8 | return True 9 | 10 | try: 11 | 12 | if "password" not in st.secrets: 13 | # no password required 14 | st.session_state["password_correct"] = True 15 | return True 16 | 17 | except FileNotFoundError: 18 | # no secrets.toml so no password required 19 | # no password required 20 | st.session_state["password_correct"] = True 21 | return True 22 | 23 | def password_entered(): 24 | """Checks whether a password entered by the user is correct.""" 25 | if hmac.compare_digest(st.session_state["password"], st.secrets["password"]): 26 | st.session_state["password_correct"] = True 27 | del st.session_state["password"] # Don't store the password. 28 | else: 29 | st.session_state["password_correct"] = False 30 | 31 | # Return True if the password is validated. 32 | 33 | # Show input for password. 34 | st.text_input( 35 | "Password", type="password", on_change=password_entered, key="password" 36 | ) 37 | if "password_correct" in st.session_state: 38 | st.error("😕 Password incorrect") 39 | return False 40 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # The builder image, used to build the virtual environment 2 | FROM python:3.11-slim as builder 3 | 4 | RUN apt-get update 5 | RUN apt-get install build-essential -y 6 | 7 | RUN pip install poetry==1.4.2 8 | 9 | ENV POETRY_NO_INTERACTION=1 \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=1 \ 11 | POETRY_VIRTUALENVS_CREATE=1 \ 12 | POETRY_CACHE_DIR=/tmp/poetry_cache 13 | 14 | # A directory to have app data 15 | WORKDIR /app 16 | 17 | COPY pyproject.toml poetry.lock ./ 18 | 19 | RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR 20 | 21 | # The runtime image, used to just run the code provided its virtual environment 22 | FROM python:3.11-slim as runtime 23 | 24 | WORKDIR /app 25 | 26 | COPY tests tests 27 | COPY conf conf 28 | 29 | RUN mkdir -p .streamlit 30 | RUN touch .streamlit/secrets.toml 31 | 32 | ENV VIRTUAL_ENV=/app/.venv \ 33 | PATH="/app/.venv/bin:$PATH" 34 | 35 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} 36 | 37 | COPY rag_assistant rag_assistant 38 | 39 | RUN apt-get update 40 | RUN apt-get install wget -y 41 | RUN mkdir /opt/tiktoken_cache 42 | ARG TIKTOKEN_URL="https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken" 43 | RUN wget -O /opt/tiktoken_cache/$(echo -n $TIKTOKEN_URL | sha1sum | head -c 40) $TIKTOKEN_URL 44 | ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache 45 | 46 | CMD ["streamlit", "run", "rag_assistant/Hello.py", "--server.port", "80"] 47 | -------------------------------------------------------------------------------- /terraform/aws/outputs.tf: -------------------------------------------------------------------------------- 1 | output "vpc_id" { 2 | description = "VPC ID" 3 | value = var.vpc_id 4 | } 5 | 6 | output "subnet_1_id" { 7 | description = "Subnet 1 ID" 8 | value = var.subnet_id_1 9 | } 10 | 11 | output "subnet_2_id" { 12 | description = "Subnet 2 ID" 13 | value = var.subnet_id_2 14 | } 15 | 16 | output "subnet_3_id" { 17 | description = "Subnet 3 ID" 18 | value = var.subnet_id_3 19 | } 20 | 21 | output "lb_arn" { 22 | description = "Load Balancer arn" 23 | value = aws_lb.application_lb.arn 24 | } 25 | 26 | output "lb_dns_name" { 27 | description = "Load Balancer DNS name" 28 | value = aws_lb.application_lb.dns_name 29 | } 30 | 31 | output "aws_ecs_cluster_arn" { 32 | description = "ECS Cluster arn" 33 | value = aws_ecs_cluster.ai_assistant_cluster.id 34 | } 35 | 36 | output "app_url" { 37 | description = "URL to access the deployed application" 38 | value = var.dns_url_app_subnet 39 | } 40 | 41 | output "ecr_image_url" { 42 | description = "URL to the image in ECR repository" 43 | value = var.ecr_image_url 44 | } 45 | 46 | output "cloudwatch_name" { 47 | description = "CloudWatch log group name" 48 | value = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.name 49 | } 50 | 51 | output "cloudwatch_arn" { 52 | description = "CloudWatch log group arn" 53 | value = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.arn 54 | } 55 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/toc.md: -------------------------------------------------------------------------------- 1 | Les 12 facteurs 2 | ================== 3 | 4 | ## [I. Base de code](./codebase) 5 | ### Une base de code suivie avec un système de contrôle de version, plusieurs déploiements 6 | 7 | ## [II. Dépendances](./dependencies) 8 | ### Déclarez explicitement et isolez les dépendances 9 | 10 | ## [III. Configuration](./config) 11 | ### Stockez la configuration dans l'environnement 12 | 13 | ## [IV. Services externes](./backing-services) 14 | ### Traitez les services externes comme des ressources attachées 15 | 16 | ## [V. Assemblez, publiez, exécutez](./build-release-run) 17 | ### Séparez strictement les étapes d'assemblage et d'exécution 18 | 19 | ## [VI. Processus](./processes) 20 | ### Exécutez l'application comme un ou plusieurs processus sans état 21 | 22 | ## [VII. Associations de ports](./port-binding) 23 | ### Exportez les services via des associations de ports 24 | 25 | ## [VIII. Concurrence](./concurrency) 26 | ### Grossissez à l'aide du modèle de processus 27 | 28 | ## [IX. Jetable](./disposability) 29 | ### Maximisez la robustesse avec des démarrages rapides et des arrêts gracieux 30 | 31 | ## [X. Parité dev/prod](./dev-prod-parity) 32 | ### Gardez le développement, la validation et la production aussi proches que possible 33 | 34 | ## [XI. Logs](./logs) 35 | ### Traitez les logs comme des flux d'évènements 36 | 37 | ## [XII. Processus d'administration](./admin-processes) 38 | ### Lancez les processus d'administration et de maintenance comme des one-off-processes 39 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/background.md: -------------------------------------------------------------------------------- 1 | Background 2 | ========== 3 | 4 | The contributors to this document have been directly involved in the development and deployment of hundreds of apps, and indirectly witnessed the development, operation, and scaling of hundreds of thousands of apps via our work on the Heroku platform. 5 | 6 | This document synthesizes all of our experience and observations on a wide variety of software-as-a-service apps in the wild. It is a triangulation on ideal practices for app development, paying particular attention to the dynamics of the organic growth of an app over time, the dynamics of collaboration between developers working on the app's codebase, and avoiding the cost of software erosion. 7 | 8 | Our motivation is to raise awareness of some systemic problems we've seen in modern application development, to provide a shared vocabulary for discussing those problems, and to offer a set of broad conceptual solutions to those problems with accompanying terminology. The format is inspired by Martin Fowler's books *Patterns of Enterprise Application Architecture* and *Refactoring*. 9 | 10 | -------------------------------------------------------------------------------- /terraform/aws/lb.tf: -------------------------------------------------------------------------------- 1 | resource "aws_lb_target_group" "ai_assistant_target_group_https" { 2 | name = "ai-assistant-group-https" 3 | port = 80 4 | protocol = "HTTP" 5 | vpc_id = data.aws_vpc.ai_assistant_vpc.id 6 | target_type = "ip" 7 | } 8 | 9 | resource "aws_lb" "application_lb" { 10 | name = "ai-assistant-alb-tf-https" 11 | internal = false 12 | load_balancer_type = "application" 13 | security_groups = [aws_security_group.ai_assistant_security_group.id] 14 | subnets = [data.aws_subnet.ai_assistant_subnet_1.id, data.aws_subnet.ai_assistant_subnet_2.id, data.aws_subnet.ai_assistant_subnet_3.id] 15 | } 16 | 17 | resource "aws_lb_listener" "application_lb_listener" { 18 | load_balancer_arn = aws_lb.application_lb.arn 19 | port = 443 20 | protocol = "HTTPS" 21 | ssl_policy = "ELBSecurityPolicy-2016-08" 22 | 23 | certificate_arn = data.aws_acm_certificate.ai_assistant_certificate.arn 24 | 25 | default_action { 26 | type = "forward" 27 | target_group_arn = aws_lb_target_group.ai_assistant_target_group_https.arn 28 | } 29 | } 30 | 31 | resource "aws_lb_listener" "application_lb_listener_redirect" { 32 | load_balancer_arn = aws_lb.application_lb.arn 33 | port = 80 34 | protocol = "HTTP" 35 | 36 | default_action { 37 | type = "redirect" 38 | 39 | redirect { 40 | port = "443" 41 | protocol = "HTTPS" 42 | status_code = "HTTP_301" 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/background.md: -------------------------------------------------------------------------------- 1 | Contexte 2 | ========== 3 | 4 | Les contributeurs de ce document ont été directement impliqués dans le développement et le déploiement de centaines d'applications, et ont vu, indirectement, le développement, la gestion et le grossissement de centaines de milliers d'applications via le travail fait sur la plateforme [Heroku](http://www.heroku.com/). 5 | 6 | Ce document fait la synthèse de toutes nos expériences et observations sur une large variété d'applications software-as-a-service. C'est la triangulation de pratiques idéales pour le développement d'applications, en portant un soin tout particulier aux dynamiques de la croissance organique d'une application au cours du temps, les dynamiques de la collaboration entre les développeurs qui travaillent sur le code de l'application, en [évitant le coût de la lente détérioration du logiciel dans un environnement qui évolue (en)](http://blog.heroku.com/archives/2011/6/28/the_new_heroku_4_erosion_resistance_explicit_contracts/). 7 | 8 | Notre motivation est de faire prendre conscience de certains problèmes systémiques que nous avons rencontrés dans le développement d'applications modernes, afin de fournir un vocabulaire partagé pour discuter ces problèmes, et pour offrir un ensemble de solutions conceptuelles générales à ces problèmes, ainsi que la terminologie correspondante. Le format est inspiré par celui des livres de Martin Fowler *[Patterns of Enterprise Application Architecture (en)](http://books.google.com/books/about/Patterns_of_enterprise_application_archi.html?id=FyWZt5DdvFkC)* et *[Refactoring (en)](http://books.google.com/books/about/Refactoring.html?id=1MsETFPD3I0C)*. 9 | 10 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/codebase.md: -------------------------------------------------------------------------------- 1 | ## I. Codebase 2 | ### One codebase tracked in revision control, many deploys 3 | 4 | A twelve-factor app is always tracked in a version control system, such as [Git](http://git-scm.com/), [Mercurial](https://www.mercurial-scm.org/), or [Subversion](http://subversion.apache.org/). A copy of the revision tracking database is known as a *code repository*, often shortened to *code repo* or just *repo*. 5 | 6 | A *codebase* is any single repo (in a centralized revision control system like Subversion), or any set of repos who share a root commit (in a decentralized revision control system like Git). 7 | 8 | ![One codebase maps to many deploys](/images/codebase-deploys.png) 9 | 10 | There is always a one-to-one correlation between the codebase and the app: 11 | 12 | * If there are multiple codebases, it's not an app -- it's a distributed system. Each component in a distributed system is an app, and each can individually comply with twelve-factor. 13 | * Multiple apps sharing the same code is a violation of twelve-factor. The solution here is to factor shared code into libraries which can be included through the [dependency manager](./dependencies). 14 | 15 | There is only one codebase per app, but there will be many deploys of the app. A *deploy* is a running instance of the app. This is typically a production site, and one or more staging sites. Additionally, every developer has a copy of the app running in their local development environment, each of which also qualifies as a deploy. 16 | 17 | The codebase is the same across all deploys, although different versions may be active in each deploy. For example, a developer has some commits not yet deployed to staging; staging has some commits not yet deployed to production. But they all share the same codebase, thus making them identifiable as different deploys of the same app. 18 | 19 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/port-binding.md: -------------------------------------------------------------------------------- 1 | ## VII. Port binding 2 | ### Export services via port binding 3 | 4 | Web apps are sometimes executed inside a webserver container. For example, PHP apps might run as a module inside [Apache HTTPD](http://httpd.apache.org/), or Java apps might run inside [Tomcat](http://tomcat.apache.org/). 5 | 6 | **The twelve-factor app is completely self-contained** and does not rely on runtime injection of a webserver into the execution environment to create a web-facing service. The web app **exports HTTP as a service by binding to a port**, and listening to requests coming in on that port. 7 | 8 | In a local development environment, the developer visits a service URL like `http://localhost:5000/` to access the service exported by their app. In deployment, a routing layer handles routing requests from a public-facing hostname to the port-bound web processes. 9 | 10 | This is typically implemented by using [dependency declaration](./dependencies) to add a webserver library to the app, such as [Tornado](http://www.tornadoweb.org/) for Python, [Thin](http://code.macournoyer.com/thin/) for Ruby, or [Jetty](http://www.eclipse.org/jetty/) for Java and other JVM-based languages. This happens entirely in *user space*, that is, within the app's code. The contract with the execution environment is binding to a port to serve requests. 11 | 12 | HTTP is not the only service that can be exported by port binding. Nearly any kind of server software can be run via a process binding to a port and awaiting incoming requests. Examples include [ejabberd](http://www.ejabberd.im/) (speaking [XMPP](http://xmpp.org/)), and [Redis](http://redis.io/) (speaking the [Redis protocol](http://redis.io/topics/protocol)). 13 | 14 | Note also that the port-binding approach means that one app can become the [backing service](./backing-services) for another app, by providing the URL to the backing app as a resource handle in the [config](./config) for the consuming app. 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/admin-processes.md: -------------------------------------------------------------------------------- 1 | ## XII. Admin processes 2 | ### Run admin/management tasks as one-off processes 3 | 4 | The [process formation](./concurrency) is the array of processes that are used to do the app's regular business (such as handling web requests) as it runs. Separately, developers will often wish to do one-off administrative or maintenance tasks for the app, such as: 5 | 6 | * Running database migrations (e.g. `manage.py migrate` in Django, `rake db:migrate` in Rails). 7 | * Running a console (also known as a [REPL](http://en.wikipedia.org/wiki/Read-eval-print_loop) shell) to run arbitrary code or inspect the app's models against the live database. Most languages provide a REPL by running the interpreter without any arguments (e.g. `python` or `perl`) or in some cases have a separate command (e.g. `irb` for Ruby, `rails console` for Rails). 8 | * Running one-time scripts committed into the app's repo (e.g. `php scripts/fix_bad_records.php`). 9 | 10 | One-off admin processes should be run in an identical environment as the regular [long-running processes](./processes) of the app. They run against a [release](./build-release-run), using the same [codebase](./codebase) and [config](./config) as any process run against that release. Admin code must ship with application code to avoid synchronization issues. 11 | 12 | The same [dependency isolation](./dependencies) techniques should be used on all process types. For example, if the Ruby web process uses the command `bundle exec thin start`, then a database migration should use `bundle exec rake db:migrate`. Likewise, a Python program using Virtualenv should use the vendored `bin/python` for running both the Tornado webserver and any `manage.py` admin processes. 13 | 14 | Twelve-factor strongly favors languages which provide a REPL shell out of the box, and which make it easy to run one-off scripts. In a local deploy, developers invoke one-off admin processes by a direct shell command inside the app's checkout directory. In a production deploy, developers can use ssh or other remote command execution mechanism provided by that deploy's execution environment to run such a process. 15 | -------------------------------------------------------------------------------- /tests/rag_bedrock/test_rag_llamaindex.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from rag_bedrock.base import LlamaIndexTestRAGHelper 4 | 5 | 6 | @pytest.mark.usefixtures("trulens_prepare", 7 | "bedrock_prepare", 8 | "temp_dir", 9 | "documents_prepare", 10 | "llm_prepare", 11 | "embeddings_prepare", 12 | "trulens_context_prepare", 13 | "provider_prepare", 14 | "eval_questions_prepare", 15 | "rag_prepare", 16 | "feedbacks_prepare") 17 | class TestRAGLlamaIndexClaude3(LlamaIndexTestRAGHelper): 18 | 19 | @property 20 | def test_name(self): 21 | return "LlamaIndex_Claude_3_Sonnet_Titan_Embed_V1" 22 | 23 | @property 24 | def model_id(self): 25 | return "anthropic.claude-3-sonnet-20240229-v1:0" 26 | 27 | @property 28 | def topic(self): 29 | return "How to Build a Career in AI" 30 | 31 | @property 32 | def embedding_model_id(self): 33 | return "amazon.titan-embed-text-v1" 34 | 35 | 36 | @pytest.mark.usefixtures("trulens_prepare", 37 | "bedrock_prepare", 38 | "temp_dir", 39 | "documents_prepare", 40 | "llm_prepare", 41 | "embeddings_prepare", 42 | "trulens_context_prepare", 43 | "provider_prepare", 44 | "eval_questions_prepare", 45 | "rag_prepare", 46 | "feedbacks_prepare") 47 | class TestRAGLlamaIndexMistral(LlamaIndexTestRAGHelper): 48 | 49 | @property 50 | def test_name(self): 51 | return "LlamaIndex_Mistral_Large_Titan_Embed_V1" 52 | 53 | @property 54 | def model_id(self): 55 | return "mistral.mistral-large-2402-v1:0" 56 | 57 | @property 58 | def topic(self): 59 | return "How to Build a Career in AI" 60 | 61 | @property 62 | def embedding_model_id(self): 63 | return "amazon.titan-embed-text-v1" 64 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/logs.md: -------------------------------------------------------------------------------- 1 | ## XI. Logs 2 | ### Treat logs as event streams 3 | 4 | *Logs* provide visibility into the behavior of a running app. In server-based environments they are commonly written to a file on disk (a "logfile"); but this is only an output format. 5 | 6 | Logs are the [stream](https://adam.herokuapp.com/past/2011/4/1/logs_are_streams_not_files/) of aggregated, time-ordered events collected from the output streams of all running processes and backing services. Logs in their raw form are typically a text format with one event per line (though backtraces from exceptions may span multiple lines). Logs have no fixed beginning or end, but flow continuously as long as the app is operating. 7 | 8 | **A twelve-factor app never concerns itself with routing or storage of its output stream.** It should not attempt to write to or manage logfiles. Instead, each running process writes its event stream, unbuffered, to `stdout`. During local development, the developer will view this stream in the foreground of their terminal to observe the app's behavior. 9 | 10 | In staging or production deploys, each process' stream will be captured by the execution environment, collated together with all other streams from the app, and routed to one or more final destinations for viewing and long-term archival. These archival destinations are not visible to or configurable by the app, and instead are completely managed by the execution environment. Open-source log routers (such as [Logplex](https://github.com/heroku/logplex) and [Fluentd](https://github.com/fluent/fluentd)) are available for this purpose. 11 | 12 | The event stream for an app can be routed to a file, or watched via realtime tail in a terminal. Most significantly, the stream can be sent to a log indexing and analysis system such as [Splunk](http://www.splunk.com/), or a general-purpose data warehousing system such as [Hadoop/Hive](http://hive.apache.org/). These systems allow for great power and flexibility for introspecting an app's behavior over time, including: 13 | 14 | * Finding specific events in the past. 15 | * Large-scale graphing of trends (such as requests per minute). 16 | * Active alerting according to user-defined heuristics (such as an alert when the quantity of errors per minute exceeds a certain threshold). 17 | -------------------------------------------------------------------------------- /terraform/aws/variables.tf: -------------------------------------------------------------------------------- 1 | variable "vpc_id" { 2 | description = "The ID of the VPC" 3 | type = string 4 | } 5 | 6 | variable "subnet_id_1" { 7 | description = "The ID of the first subnet" 8 | type = string 9 | } 10 | 11 | variable "subnet_id_2" { 12 | description = "The ID of the second subnet" 13 | type = string 14 | } 15 | 16 | variable "subnet_id_3" { 17 | description = "The ID of the third subnet" 18 | type = string 19 | } 20 | 21 | variable "dns_url" { 22 | description = "The base DNS url (without subnet)." 23 | type = string 24 | } 25 | 26 | variable "dns_url_app_subnet" { 27 | description = "The DNS URL of your application. (It need to have a valid HTTPS certificate and a route 53 hosted zone)" 28 | type = string 29 | } 30 | 31 | variable "ecr_image_url" { 32 | description = "The AI assistant demonstrator ECR URL" 33 | type = string 34 | } 35 | 36 | variable "secret_name" { 37 | description = "SecretManager secret name" 38 | type = string 39 | } 40 | 41 | variable "openai_key_name" { 42 | description = "OpenAI key name in SecretManager secret" 43 | type = string 44 | } 45 | 46 | variable "mistral_key_name" { 47 | description = "Mistral key name in SecretManager secret" 48 | type = string 49 | } 50 | 51 | 52 | variable "hf_token_name" { 53 | description = "HF token name in SecretManager secret" 54 | type = string 55 | } 56 | 57 | variable "langchain_key_name" { 58 | description = "Langchain key name in SecretManager secret" 59 | type = string 60 | } 61 | 62 | variable "langchain_tracing_v2_bool" { 63 | description = "Langchain tracing V2 boolean string ('true' or 'false')" 64 | type = string 65 | } 66 | 67 | variable "opensearch_domain_name" { 68 | description = "Name of the Opensearch domain" 69 | type = string 70 | } 71 | 72 | variable "opensearch_dashboard_user" { 73 | description = "Username for Opensearch Dahsboard user" 74 | type = string 75 | } 76 | 77 | variable "opensearch_dashboard_password" { 78 | description = "Password for Opensearch Dahsboard user" 79 | type = string 80 | } 81 | 82 | variable "opensearch_allowed_users_and_policy_arn" { 83 | description = "List of allowed users and roles" 84 | type = list(string) 85 | } 86 | 87 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/port-binding.md: -------------------------------------------------------------------------------- 1 | ## VII. Associations de ports 2 | ### Exportez les services via des associations de ports 3 | 4 | Les applications web sont parfois exécutées à l'intérieur d'un container de serveur web. Par exemple, les applications PHP peuvent fonctionner comme un module à l'intérieur de [HTTPD, d'Apache](http://httpd.apache.org/), ou bien les applications Java peuvent fonctionner à l'intérieur de [Tomcat](http://tomcat.apache.org/). 5 | 6 | **Les applications 12 facteurs sont complètement auto-contenues** et ne se basent pas sur l'injection au moment de l'exécution d'un serveur web dans l'environnement d'exécution pour créer les services exposés au web. L'application web **expose HTTP comme un service en l'associant à un port** et écoute les requêtes qui arrivent sur ce port. 7 | 8 | Dans un environnement de développement local, le développeur visite l'URL d'un service tel que `http://localhost:5000/` pour accéder au service exporté par leur application. Durant le déploiement, une couche de routage gère le routage des requêtes depuis un nom d'hôte qui s'expose au public, vers les processus sur lequel est associé le port. 9 | 10 | Ceci est typiquement implémenté en utilisant [la déclaration de dépendances](./dependencies) pour ajouter une bibliothèque de serveur web, tel que [Tornado](http://www.tornadoweb.org/) pour Python, [Thin](http://code.macournoyer.com/thin/) pour Ruby, ou [Jetty](http://www.eclipse.org/jetty/) pour Java et autres langages basés sur la JVM. Cela se déroule entièrement dans l'espace utilisateur, c'est-à-dire, dans le code de l'application. Le contrat avec l'environnement d'exécution, c'est l'association de port pour servir les requêtes. 11 | 12 | HTTP n'est pas le seul service qui peut être exporté à l'aide d'association de ports. Presque tout type de serveur peut fonctionner à travers l'association à un port et l'écoute des requêtes entrantes. Il y a par exemple [ejabberd](http://www.ejabberd.im/) (qui parle [XMPP](http://xmpp.org/)), et [Redis](http://redis.io/) (qui parle le [protocole Redis](http://redis.io/topics/protocol)). 13 | 14 | Notez également que l'approche par association de port signifie qu'une application peut devenir le [service externe](./backing-services) d'une autre application, en fournissant l'URL de l'application externe dans la configuration de l'application qui la consomme. 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/codebase.md: -------------------------------------------------------------------------------- 1 | ## I. Base de code 2 | ### Une base de code suivie avec un système de contrôle de version, plusieurs déploiements 3 | 4 | Une application 12 facteurs est toujours suivie dans un système de contrôle de version, tel que [Git](http://git-scm.com/), [Mercurial](https://www.mercurial-scm.org/), ou [Subversion](http://subversion.apache.org/). Une copie de la base de données de suivi des révisions est appelée *dépôt de code*, souvent raccourci en *dépôt*. Le terme anglais *code repository*, raccourci en *repository* et *repo* est également utilisé. 5 | 6 | Une *base de code* correspond à chaque dépôt (dans un système de contrôle de version centralisé tel que Subversion), ou tout ensemble de dépôts qui partage un commit racine (dans un système de contrôle de version décentralisé comme Git). 7 | 8 | ![Une base de code est associée à plusieurs déploiements](/images/codebase-deploys.png) 9 | 10 | Il y a toujours un rapport direct entre la base de code et l'application : 11 | 12 | * S'il y a plusieurs bases de code, ce n'est pas une application, c'est un système distribué. Chaque composant du système distribué est une application, et chacun peut individuellement respecter la méthodologie 12 facteurs. 13 | * Plusieurs applications partageant le même code est une violation des 12 facteurs. La solution dans ce cas est de factoriser le code partagé dans des bibliothèques qui peuvent être intégrées via un [gestionnaire de dépendances](./dependencies). 14 | 15 | Il y a seulement une base de code par application, mais il y aura plusieurs déploiements de l'application. Un *déploiement* est une instance en fonctionnement de l'application. C'est, par exemple, le site en production, ou bien un ou plusieurs sites de validation. En plus de cela, chaque développeur a une copie de l'application qui fonctionne dans son environnement local de développement, ce qui compte également comme un déploiement. 16 | 17 | La base de code est la même à travers tous les déploiements, bien que différentes versions puissent être actives dans chaque déploiement. Par exemple, un développeur a des commits qui ne sont pas encore déployés dans l'environnement de validation. L'environnement de validation a des commits qui ne sont pas encore déployés en production. Par contre, ils partagent tous la même base de code, ce qui les identifie comme étant des déploiements différents d'une même application. 18 | 19 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/processes.md: -------------------------------------------------------------------------------- 1 | ## VI. Processes 2 | ### Execute the app as one or more stateless processes 3 | 4 | The app is executed in the execution environment as one or more *processes*. 5 | 6 | In the simplest case, the code is a stand-alone script, the execution environment is a developer's local laptop with an installed language runtime, and the process is launched via the command line (for example, `python my_script.py`). On the other end of the spectrum, a production deploy of a sophisticated app may use many [process types, instantiated into zero or more running processes](./concurrency). 7 | 8 | **Twelve-factor processes are stateless and [share-nothing](http://en.wikipedia.org/wiki/Shared_nothing_architecture).** Any data that needs to persist must be stored in a stateful [backing service](./backing-services), typically a database. 9 | 10 | The memory space or filesystem of the process can be used as a brief, single-transaction cache. For example, downloading a large file, operating on it, and storing the results of the operation in the database. The twelve-factor app never assumes that anything cached in memory or on disk will be available on a future request or job -- with many processes of each type running, chances are high that a future request will be served by a different process. Even when running only one process, a restart (triggered by code deploy, config change, or the execution environment relocating the process to a different physical location) will usually wipe out all local (e.g., memory and filesystem) state. 11 | 12 | Asset packagers like [django-assetpackager](http://code.google.com/p/django-assetpackager/) use the filesystem as a cache for compiled assets. A twelve-factor app prefers to do this compiling during the [build stage](/build-release-run). Asset packagers such as [Jammit](http://documentcloud.github.io/jammit/) and the [Rails asset pipeline](http://ryanbigg.com/guides/asset_pipeline.html) can be configured to package assets during the build stage. 13 | 14 | Some web systems rely on ["sticky sessions"](http://en.wikipedia.org/wiki/Load_balancing_%28computing%29#Persistence) -- that is, caching user session data in memory of the app's process and expecting future requests from the same visitor to be routed to the same process. Sticky sessions are a violation of twelve-factor and should never be used or relied upon. Session state data is a good candidate for a datastore that offers time-expiration, such as [Memcached](http://memcached.org/) or [Redis](http://redis.io/). 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/build-release-run.md: -------------------------------------------------------------------------------- 1 | ## V. Build, release, run 2 | ### Strictly separate build and run stages 3 | 4 | A [codebase](./codebase) is transformed into a (non-development) deploy through three stages: 5 | 6 | * The *build stage* is a transform which converts a code repo into an executable bundle known as a *build*. Using a version of the code at a commit specified by the deployment process, the build stage fetches vendors [dependencies](./dependencies) and compiles binaries and assets. 7 | * The *release stage* takes the build produced by the build stage and combines it with the deploy's current [config](./config). The resulting *release* contains both the build and the config and is ready for immediate execution in the execution environment. 8 | * The *run stage* (also known as "runtime") runs the app in the execution environment, by launching some set of the app's [processes](./processes) against a selected release. 9 | 10 | ![Code becomes a build, which is combined with config to create a release.](/images/release.png) 11 | 12 | **The twelve-factor app uses strict separation between the build, release, and run stages.** For example, it is impossible to make changes to the code at runtime, since there is no way to propagate those changes back to the build stage. 13 | 14 | Deployment tools typically offer release management tools, most notably the ability to roll back to a previous release. For example, the [Capistrano](https://github.com/capistrano/capistrano/wiki) deployment tool stores releases in a subdirectory named `releases`, where the current release is a symlink to the current release directory. Its `rollback` command makes it easy to quickly roll back to a previous release. 15 | 16 | Every release should always have a unique release ID, such as a timestamp of the release (such as `2011-04-06-20:32:17`) or an incrementing number (such as `v100`). Releases are an append-only ledger and a release cannot be mutated once it is created. Any change must create a new release. 17 | 18 | Builds are initiated by the app's developers whenever new code is deployed. Runtime execution, by contrast, can happen automatically in cases such as a server reboot, or a crashed process being restarted by the process manager. Therefore, the run stage should be kept to as few moving parts as possible, since problems that prevent an app from running can cause it to break in the middle of the night when no developers are on hand. The build stage can be more complex, since errors are always in the foreground for a developer who is driving the deploy. 19 | 20 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/dependencies.md: -------------------------------------------------------------------------------- 1 | ## II. Dependencies 2 | ### Explicitly declare and isolate dependencies 3 | 4 | Most programming languages offer a packaging system for distributing support libraries, such as [CPAN](http://www.cpan.org/) for Perl or [Rubygems](http://rubygems.org/) for Ruby. Libraries installed through a packaging system can be installed system-wide (known as "site packages") or scoped into the directory containing the app (known as "vendoring" or "bundling"). 5 | 6 | **A twelve-factor app never relies on implicit existence of system-wide packages.** It declares all dependencies, completely and exactly, via a *dependency declaration* manifest. Furthermore, it uses a *dependency isolation* tool during execution to ensure that no implicit dependencies "leak in" from the surrounding system. The full and explicit dependency specification is applied uniformly to both production and development. 7 | 8 | For example, [Bundler](https://bundler.io/) for Ruby offers the `Gemfile` manifest format for dependency declaration and `bundle exec` for dependency isolation. In Python there are two separate tools for these steps -- [Pip](http://www.pip-installer.org/en/latest/) is used for declaration and [Virtualenv](http://www.virtualenv.org/en/latest/) for isolation. Even C has [Autoconf](http://www.gnu.org/s/autoconf/) for dependency declaration, and static linking can provide dependency isolation. No matter what the toolchain, dependency declaration and isolation must always be used together -- only one or the other is not sufficient to satisfy twelve-factor. 9 | 10 | One benefit of explicit dependency declaration is that it simplifies setup for developers new to the app. The new developer can check out the app's codebase onto their development machine, requiring only the language runtime and dependency manager installed as prerequisites. They will be able to set up everything needed to run the app's code with a deterministic *build command*. For example, the build command for Ruby/Bundler is `bundle install`, while for Clojure/[Leiningen](https://github.com/technomancy/leiningen#readme) it is `lein deps`. 11 | 12 | Twelve-factor apps also do not rely on the implicit existence of any system tools. Examples include shelling out to ImageMagick or `curl`. While these tools may exist on many or even most systems, there is no guarantee that they will exist on all systems where the app may run in the future, or whether the version found on a future system will be compatible with the app. If the app needs to shell out to a system tool, that tool should be vendored into the app. 13 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/concurrency.md: -------------------------------------------------------------------------------- 1 | ## VIII. Concurrency 2 | ### Scale out via the process model 3 | 4 | Any computer program, once run, is represented by one or more processes. Web apps have taken a variety of process-execution forms. For example, PHP processes run as child processes of Apache, started on demand as needed by request volume. Java processes take the opposite approach, with the JVM providing one massive uberprocess that reserves a large block of system resources (CPU and memory) on startup, with concurrency managed internally via threads. In both cases, the running process(es) are only minimally visible to the developers of the app. 5 | 6 | ![Scale is expressed as running processes, workload diversity is expressed as process types.](/images/process-types.png) 7 | 8 | **In the twelve-factor app, processes are a first class citizen.** Processes in the twelve-factor app take strong cues from [the unix process model for running service daemons](https://adam.herokuapp.com/past/2011/5/9/applying_the_unix_process_model_to_web_apps/). Using this model, the developer can architect their app to handle diverse workloads by assigning each type of work to a *process type*. For example, HTTP requests may be handled by a web process, and long-running background tasks handled by a worker process. 9 | 10 | This does not exclude individual processes from handling their own internal multiplexing, via threads inside the runtime VM, or the async/evented model found in tools such as [EventMachine](https://github.com/eventmachine/eventmachine), [Twisted](http://twistedmatrix.com/trac/), or [Node.js](http://nodejs.org/). But an individual VM can only grow so large (vertical scale), so the application must also be able to span multiple processes running on multiple physical machines. 11 | 12 | The process model truly shines when it comes time to scale out. The [share-nothing, horizontally partitionable nature of twelve-factor app processes](./processes) means that adding more concurrency is a simple and reliable operation. The array of process types and number of processes of each type is known as the *process formation*. 13 | 14 | Twelve-factor app processes [should never daemonize](http://dustin.github.com/2010/02/28/running-processes.html) or write PID files. Instead, rely on the operating system's process manager (such as [systemd](https://www.freedesktop.org/wiki/Software/systemd/), a distributed process manager on a cloud platform, or a tool like [Foreman](http://blog.daviddollar.org/2011/05/06/introducing-foreman.html) in development) to manage [output streams](./logs), respond to crashed processes, and handle user-initiated restarts and shutdowns. 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/admin-processes.md: -------------------------------------------------------------------------------- 1 | ## XII. Processus d'administration 2 | ### Lancez les processus d'administration et de maintenance comme des one-off-processes 3 | 4 | La [formation de processus](./concurrency) est la liste des processus qui sont utilisés pour le fonctionnement normal de l'application (comme gérer les requêtes web) lorsqu'elle tourne. Les développeurs vont souvent vouloir effectuer des tâches occasionnelles d'administration ou de maintenance, comme : 5 | 6 | * Lancer les migrations de base de données (par ex. `manage.py migrate` avec Django, `rake db:migrate` avec Rails). 7 | * Lancer une console (également appelée terminal [REPL](http://en.wikipedia.org/wiki/Read-eval-print_loop)) pour exécuter du code arbitraire ou inspecter les modèles de l'application dans la base de données. La plupart des langages fournissent un terminal REPL en lançant l'interpréteur sans arguments (par exemple `python` ou `perl`), ou dans certains cas à l'aide d'une commande dédiée (par ex. `irb` pour Ruby, `rails console` pour Rails). 8 | * Exécuter des scripts ponctuels inclus dans le dépôt de code (par ex. `php scripts/fix_bad_records.php`). 9 | 10 | Les processus ponctuels d'administration devraient être lancés dans un environnement identique à ceux des [processus standards](./processes) de l'application. Ils s'exécutent sur une [release](./build-release-run), en utilisant la même [base de code](./codebase) et [configuration](./config) que tout processus qui tourne pour cette release. Le code d'administration doit être livré avec le code de l'application afin d'éviter les problèmes de synchronisation. 11 | 12 | La même technique d'[isolation de dépendances](./dependencies) doit être utilisée sur tous les types de processus. Par exemple, si le processus web de Ruby utilise la commande `bundle exec thin start`, alors une migration de base de données devrait être faite via `bundle exec rake db:migrate`. De la même manière, un programme Python qui utilise Virtualenv devrait utiliser la commande incluse `bin/python` pour lancer à la fois le serveur web Tornado et tout processus administrateur `manage.py`. 13 | 14 | Les applications 12 facteurs préfèrent les langages qui fournissent un terminal REPL prêt à l'emploi, et qui facilitent l'exécution de scripts ponctuels. Dans un déploiement local, les développeurs invoquent les processus ponctuels d'administration depuis le terminal, par une commande directement dans le répertoire où se trouve l'application. Dans un déploiement de production, les développeurs peuvent utiliser ssh ou d'autres mécanismes d'exécution de commandes fournis par l'environnement d'exécution de ce déploiement pour exécuter un tel processus. 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/logs.md: -------------------------------------------------------------------------------- 1 | ## XI. Logs 2 | ### Traitez les logs comme des flux d'évènements 3 | 4 | Les *logs* fournissent de la visibilité au comportement de l'application qui s'exécute. Dans des environnements de type serveur, ils sont généralement écrits dans un fichier, sur le disque (dans un fichier de log). Mais c'est simplement un format de sortie. 5 | 6 | Les logs sont des [flux (en)](https://adam.herokuapp.com/past/2011/4/1/logs_are_streams_not_files/) d'agrégats d'évènements, ordonnés dans le temps, collectés à travers les flux de sortie de tous les processus et services externes qui tournent. Les logs, dans leur forme brute, sont au format texte avec un événement par ligne (bien que les traces d'exécutions puissent s'étaler sur plusieurs lignes). Les logs n'ont pas de début ou de fin fixe, mais se remplissent en continu tant que l'application est en marche. 7 | 8 | **Une application 12 facteurs ne s'inquiète jamais du routage ou du stockage de ses flux de sortie.** Elle ne devrait pas tenter d'écrire ou de gérer les fichiers de logs. À la place, chaque processus qui tourne écrit ses flux d'événements, sans tampon, vers `stdout`, la sortie standard ; en phase de développement local, les développeurs pourront voir ce flux dans leur terminal et observer le comportement de l'application. 9 | 10 | Dans les déploiements de validation ou de production, les flux de chaque processus seront capturés par leur environnement d'exécution, assemblés avec les autres flux de l'application, et routés vers une ou plusieurs destinations pour un visionnage et un archivage de longue durée. Le lieu d'archivage n'est pas visible et ne peut être configuré par l'application : ils sont complètements gérés par l'environnement d'exécution. Des routeurs opensource de logs, (tel que [Logplex](https://github.com/heroku/logplex) et [Fluentd](https://github.com/fluent/fluentd)) existent pour cela. 11 | 12 | Le flux d'événements d'une application peut être routé dans un fichier, ou surveillé en temps réel (avec tail) dans un terminal. Plus pertinent, les flux peuvent être envoyés vers un outil d'indexation et d'archivage des logs tel que [Splunk](http://www.splunk.com/), ou bien dans un entrepôt de données générique comme [Hadoop/Hive](http://hive.apache.org/). Ces systèmes sont très puissants et flexibles pour inspecter le comportement de l'application au cours du temps, ce qui inclut : 13 | 14 | * Trouver un événement spécifique dans le passé 15 | * Faire des graphiques à grande échelle des tendances (comme le nombre de requêtes par minutes) 16 | * Lever des alertes, à partir d'heuristiques définies par l'utilisateur (comme alerter dès que la quantité d'erreurs par minutes dépasse un certain seuil) 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "applied-ai-rag-assistant" 3 | version = "0.0.1" 4 | description = "Advanced RAG assistant" 5 | authors = ["Pierre Bittner "] 6 | license = "Apache 2.0" 7 | readme = "README.md" 8 | packages = [{include = "rag_assistant"}] 9 | 10 | [tool.poetry.dependencies] 11 | python = ">=3.10,<3.12" 12 | langchain = {version = ">=0.2.0"} 13 | openai = ">=1.6.1" 14 | duckduckgo-search = ">4.0" 15 | pypdf = ">=4.0.1,<5.0.0" # "^3.12.2" 16 | sentence-transformers = "^2.2.2" 17 | torch = ">=2.0.0, !=2.0.1" 18 | tabulate = "^0.9.0" 19 | streamlit-feedback = "^0.1.3" 20 | langchain-experimental = "^0.0.59" 21 | streamlit = ">=1.26" 22 | docarray = "^0.40.0" 23 | langchain-community = ">=0.2.0" # must have "^0.0.29" 24 | langchain-openai = ">=0.1.1" 25 | streamlit_pdf_viewer = ">=0.0.12" 26 | # There is a incompatibility issue with chroma and llamaindex 27 | # was working with 0.4.3 but it is no longer supporter by llamaidnex and there is incompatibility issue 28 | # newer version of chromadb raised "ModuleNotFoundError: No module named 'hnswlib'" in vector/local_hnsw.py", line 26 29 | # llama-index-vector-stores-chroma (0.1.6) depends on chromadb (>=0.4.22,<0.5.0) 30 | # at runtime 31 | # using FAISS for now as vectorstore 32 | # chromadb = "0.4.3" # Must have "0.4.3" 33 | # solved by doing pip uninstall hnswlib 34 | # and pip install chroma-hnswlib 35 | chromadb = ">=0.4.24" 36 | boto3= ">=1.28.59" 37 | chroma-hnswlib = ">=0.7.3" 38 | numexpr = "^2.8.8" 39 | langchainhub = "^0.1.14" 40 | llama-index = ">=0.10.20" 41 | llama-index-llms-openai = ">=0.1.12" 42 | llama-index-readers-wikipedia = ">=0.1.3" 43 | llama-index-vector-stores-chroma = ">=0.1.6" 44 | llama-index-embeddings-huggingface = ">=0.1.6" 45 | nltk = ">=3.8.1" 46 | python-dotenv= ">=1.0.0" 47 | PyPDF2 = ">=3.0.1" 48 | faiss-cpu = ">=1.8.0" 49 | wikipedia = ">=1.4.0" 50 | pathlib = ">=1.0.1" 51 | pytest = ">=8.1.1" 52 | trulens_eval = ">=0.27.0" 53 | mistralai = ">=0.4.0" 54 | llama-index-llms-mistralai = ">=0.1.10" 55 | llama-index-llms-bedrock = ">=0.1.8" 56 | llama-index-llms-anthropic = ">=0.1.11" 57 | llama-index-embeddings-mistralai = ">=0.1.4" 58 | llama-index-embeddings-langchain = ">=0.1.2" 59 | llama-index-embeddings-bedrock = ">=0.1.5" 60 | protobuf = "=3.20.3" 61 | 62 | # pysqlite3-binary = "^0.5.2.post3" 63 | langchain_mistralai = ">=0.1.0" 64 | langchain-aws = ">=0.1.4" 65 | 66 | opensearch-py = ">=2.6.0" 67 | requests-aws4auth = ">=1.2.3" 68 | 69 | lark = ">=1.1.9" 70 | 71 | [tool.poetry.group.dev.dependencies] 72 | black = "^23.3.0" 73 | mypy = "^1.4.1" 74 | pre-commit = "^3.3.3" 75 | 76 | [build-system] 77 | requires = ["poetry-core"] 78 | build-backend = "poetry.core.masonry.api" 79 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/backing-services.md: -------------------------------------------------------------------------------- 1 | ## IV. Backing services 2 | ### Treat backing services as attached resources 3 | 4 | A *backing service* is any service the app consumes over the network as part of its normal operation. Examples include datastores (such as [MySQL](http://dev.mysql.com/) or [CouchDB](http://couchdb.apache.org/)), messaging/queueing systems (such as [RabbitMQ](http://www.rabbitmq.com/) or [Beanstalkd](https://beanstalkd.github.io)), SMTP services for outbound email (such as [Postfix](http://www.postfix.org/)), and caching systems (such as [Memcached](http://memcached.org/)). 5 | 6 | Backing services like the database are traditionally managed by the same systems administrators who deploy the app's runtime. In addition to these locally-managed services, the app may also have services provided and managed by third parties. Examples include SMTP services (such as [Postmark](http://postmarkapp.com/)), metrics-gathering services (such as [New Relic](http://newrelic.com/) or [Loggly](http://www.loggly.com/)), binary asset services (such as [Amazon S3](http://aws.amazon.com/s3/)), and even API-accessible consumer services (such as [Twitter](http://dev.twitter.com/), [Google Maps](https://developers.google.com/maps/), or [Last.fm](http://www.last.fm/api)). 7 | 8 | **The code for a twelve-factor app makes no distinction between local and third party services.** To the app, both are attached resources, accessed via a URL or other locator/credentials stored in the [config](./config). A [deploy](./codebase) of the twelve-factor app should be able to swap out a local MySQL database with one managed by a third party (such as [Amazon RDS](http://aws.amazon.com/rds/)) without any changes to the app's code. Likewise, a local SMTP server could be swapped with a third-party SMTP service (such as Postmark) without code changes. In both cases, only the resource handle in the config needs to change. 9 | 10 | Each distinct backing service is a *resource*. For example, a MySQL database is a resource; two MySQL databases (used for sharding at the application layer) qualify as two distinct resources. The twelve-factor app treats these databases as *attached resources*, which indicates their loose coupling to the deploy they are attached to. 11 | 12 | A production deploy attached to four backing services. 13 | 14 | Resources can be attached to and detached from deploys at will. For example, if the app's database is misbehaving due to a hardware issue, the app's administrator might spin up a new database server restored from a recent backup. The current production database could be detached, and the new database attached -- all without any code changes. 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/disposability.md: -------------------------------------------------------------------------------- 1 | ## IX. Disposability 2 | ### Maximize robustness with fast startup and graceful shutdown 3 | 4 | **The twelve-factor app's [processes](./processes) are *disposable*, meaning they can be started or stopped at a moment's notice.** This facilitates fast elastic scaling, rapid deployment of [code](./codebase) or [config](./config) changes, and robustness of production deploys. 5 | 6 | Processes should strive to **minimize startup time**. Ideally, a process takes a few seconds from the time the launch command is executed until the process is up and ready to receive requests or jobs. Short startup time provides more agility for the [release](./build-release-run) process and scaling up; and it aids robustness, because the process manager can more easily move processes to new physical machines when warranted. 7 | 8 | Processes **shut down gracefully when they receive a [SIGTERM](http://en.wikipedia.org/wiki/SIGTERM)** signal from the process manager. For a web process, graceful shutdown is achieved by ceasing to listen on the service port (thereby refusing any new requests), allowing any current requests to finish, and then exiting. Implicit in this model is that HTTP requests are short (no more than a few seconds), or in the case of long polling, the client should seamlessly attempt to reconnect when the connection is lost. 9 | 10 | For a worker process, graceful shutdown is achieved by returning the current job to the work queue. For example, on [RabbitMQ](http://www.rabbitmq.com/) the worker can send a [`NACK`](http://www.rabbitmq.com/amqp-0-9-1-quickref.html#basic.nack); on [Beanstalkd](https://beanstalkd.github.io), the job is returned to the queue automatically whenever a worker disconnects. Lock-based systems such as [Delayed Job](https://github.com/collectiveidea/delayed_job#readme) need to be sure to release their lock on the job record. Implicit in this model is that all jobs are [reentrant](http://en.wikipedia.org/wiki/Reentrant_%28subroutine%29), which typically is achieved by wrapping the results in a transaction, or making the operation [idempotent](http://en.wikipedia.org/wiki/Idempotence). 11 | 12 | Processes should also be **robust against sudden death**, in the case of a failure in the underlying hardware. While this is a much less common occurrence than a graceful shutdown with `SIGTERM`, it can still happen. A recommended approach is use of a robust queueing backend, such as Beanstalkd, that returns jobs to the queue when clients disconnect or time out. Either way, a twelve-factor app is architected to handle unexpected, non-graceful terminations. [Crash-only design](http://lwn.net/Articles/191059/) takes this concept to its [logical conclusion](http://docs.couchdb.org/en/latest/intro/overview.html). 13 | 14 | 15 | -------------------------------------------------------------------------------- /terraform/aws/iam.tf: -------------------------------------------------------------------------------- 1 | resource "aws_iam_role" "ai_assistant_ecs_execution_role" { 2 | name = "ai_assistant_ecs_execution_role_https" 3 | 4 | assume_role_policy = jsonencode({ 5 | Version = "2012-10-17", 6 | Statement = [{ 7 | Action = "sts:AssumeRole", 8 | Effect = "Allow", 9 | Principal = { 10 | Service = "ecs-tasks.amazonaws.com" 11 | } 12 | }] 13 | }) 14 | } 15 | 16 | resource "aws_iam_role_policy_attachment" "ai_assistant_secret_read_role_attachment" { 17 | policy_arn = "arn:aws:iam::aws:policy/SecretsManagerReadWrite" 18 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 19 | } 20 | 21 | resource "aws_iam_role_policy_attachment" "ai_assistant_cloud_watch_access_role_attachment" { 22 | policy_arn = "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess" 23 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 24 | } 25 | 26 | resource "aws_iam_role_policy_attachment" "ai_assistant_efs_access_role_attachment" { 27 | policy_arn = "arn:aws:iam::aws:policy/AmazonElasticFileSystemFullAccess" 28 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 29 | } 30 | 31 | resource "aws_iam_role_policy_attachment" "ai_assistant_ecs_execution_role_attachment" { 32 | policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" 33 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 34 | } 35 | 36 | resource "aws_iam_role_policy_attachment" "ai_assistant_ec2_container_registry_role_attachment" { 37 | policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" 38 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 39 | } 40 | 41 | resource "aws_iam_role_policy_attachment" "ai_assistant_bedrock_access_role_attachment" { 42 | policy_arn = "arn:aws:iam::aws:policy/AmazonBedrockFullAccess" 43 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 44 | } 45 | 46 | resource "aws_iam_role_policy_attachment" "ai_assistant_opensearch_access_role_attachment" { 47 | policy_arn = "arn:aws:iam::aws:policy/AmazonOpenSearchServiceFullAccess" 48 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 49 | } 50 | 51 | 52 | resource "aws_iam_policy" "ai_assistant_s3_access_policy" { 53 | name = "ai_assistant_s3_access_policy" 54 | 55 | policy = jsonencode({ 56 | Version = "2012-10-17" 57 | Statement = [ 58 | { 59 | Effect = "Allow" 60 | Action = [ 61 | "s3:GetObject", 62 | "s3:PutObject", 63 | "s3:DeleteObject" 64 | ] 65 | Resource = [ 66 | "${aws_s3_bucket.ai_assistant_bucket.arn}/*" 67 | ] 68 | } 69 | ] 70 | }) 71 | } 72 | 73 | resource "aws_iam_role_policy_attachment" "ai_assistant_s3_access_role_attachment" { 74 | policy_arn = aws_iam_policy.ai_assistant_s3_access_policy.arn 75 | role = aws_iam_role.ai_assistant_ecs_execution_role.name 76 | } -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/processes.md: -------------------------------------------------------------------------------- 1 | ## VI. Processus 2 | ### Exécutez l'application comme un ou plusieurs processus sans état 3 | 4 | L'application est exécutée dans l'environnement d'exécution comme un ou plusieurs *processus*. 5 | 6 | Dans la situation la plus simple, le code est un script indépendant, l'environnement d'exécution est l'ordinateur portable du développeur sur lequel est installé de quoi exécuter le langage, et le processus est lancé depuis la ligne de commande. (par exemple, `python mon_script.py`). De l'autre côté du spectre, un déploiement de production d'une application sophistiquée peut utiliser plusieurs [types de processus, instanciés dans zéro ou plus processus en fonctionnement](./concurrency). 7 | 8 | **Les processus 12 facteurs sont sans état et ne partagent [rien (en)](http://en.wikipedia.org/wiki/Shared_nothing_architecture).** Toute donnée qui doit être persistée doit être stockée dans un [service externe](./backing-services) stateful, typiquement une base de données. 9 | 10 | L'espace mémoire ou le système de fichier du processus peut être utilisé comme cache momentané pour des transactions uniques. Par exemple, télécharger un gros fichier, effectuer une opération dessus, puis stocker le résultat de l'opération dans la base de données. Les applications 12 facteurs ne supposent jamais que quelque chose ayant été mis en cache en mémoire ou sur le disque sera disponible dans une future requête ou job — avec plusieurs processus de chaque type qui s'exécutent, il y a de grandes chances qu'une future requête soit effectuée par un processus différent. Même lorsque l'on fait tourner seulement un processus, un redémarrage (déclenché par le déploiement du code, un changement de configuration, ou l'environnement d'exécution qui déplace le processus vers un lieu physique différent) va généralement balayer toutes les modifications locales (c'est-à-dire en mémoire et sur le disque). 11 | 12 | Des outils de création de paquets de ressources (ou "asset packagers") (tel que [Jammit](http://documentcloud.github.io/jammit/) ou [django-compressor](http://django-compressor.readthedocs.org/)) utilisent le système de fichier comme cache pour les ressources compilées. Une application 12 facteurs préfère faire cette compilation durant l'[étape d'assemblage](./build-release-run), comme avec le [pipeline des ressources de Rails](http://guides.rubyonrails.org/asset_pipeline.html), plutôt que durant l'exécution. 13 | 14 | Certains systèmes web s'appuient sur des ["sessions persistantes" (en)](http://en.wikipedia.org/wiki/Load_balancing_%28computing%29#Persistence) -- c'est-à-dire, mettre en cache les données de session utilisateur dans le processus de l'application et attendre que les requêtes futures du même visiteur seront routées dans le même processus. Les sessions persistantes sont une violation des 12 facteurs, qu'il ne faudrait jamais utiliser. 15 | Les états de session sont de bons candidats pour un datastore qui offre des dates d'expiration, comme [Memcached](http://memcached.org/) ou [Redis](http://redis.io/). 16 | 17 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/dependencies.md: -------------------------------------------------------------------------------- 1 | ## II. Dépendances 2 | ### Déclarez explicitement et isolez les dépendances 3 | 4 | La plupart des langages de programmation offrent des systèmes pour créer des paquets à partir de bibliothèques afin de les distribuer, tel que [CPAN](http://www.cpan.org/) pour Perl ou [Rubygems](http://rubygems.org/) pour Ruby. Les bibliothèques installées à travers un système de packaging peuvent être installées à travers tout le système, ou bien limitées au répertoire contenant l'application (que l'on appelle les "vendor" ou "bundles"). 5 | 6 | **Une application 12 facteurs ne dépend jamais de l'existence implicite de packages au niveau du système**. Elle déclare toutes ses dépendances, complètement et exactement, à travers un manifeste de *déclaration de dépendances*. De plus, elle utilise un outil d'isolation des dépendances durant l'exécution afin d'assurer qu'aucune dépendance implicite ne s'introduise depuis le système environnant. Les spécifications complètes et explicites sont appliquées uniformément en développement comme en production. 7 | 8 | Par exemple, [Bundler](https://bundler.io/) pour Ruby fournit le format de manifeste `Gemfile` pour la déclaration des dépendances, ainsi que la commande `bundle exec` pour l'isolation des dépendances. En python, il y a deux outils séparés pour ces étapes -- [Pip](http://www.pip-installer.org/en/latest/) est utilisé pour la déclaration et [Virtualenv](http://www.virtualenv.org/en/latest/) pour l'isolation. Même le C dispose d'[Autoconf](http://www.gnu.org/s/autoconf/) pour les déclarations de dépendances, et la liaison statique peut fournir l'isolation des dépendances. Peu importe la chaîne d'outils, la déclaration et l'isolation des dépendances doivent toujours être utilisées ensemble -- seulement l'un ou l'autre ne suffit pas à satisfaire les 12 facteurs. 9 | 10 | Un des bénéfices de la déclaration explicite des dépendances est que cela simplifie la mise en place pour les développeurs qui découvrent l'application. Les nouveaux développeurs peuvent jeter un œil à la base de code de l'application sur leur machine de développement, en ayant besoin uniquement d'avoir de quoi exécuter le langage ainsi que le gestionnaire de dépendances installé en pré-requis. Ils pourront mettre en place tout ce qui est nécessaire pour faire fonctionner le code de l'application de manière déterministe grâce à une *commande d'assemblage* (commande de build). Par exemple, la commande d'assemblage pour Ruby/Bundler est `bundle install`, alors que pour Clojure/[Leiningen](https://github.com/technomancy/leiningen#readme) c'est `lein deps`. 11 | 12 | Les applications 12 facteurs ne s'appuient pas sur l'existence implicite d'outils système, par exemple ImageMagick ou `curl`. Bien que ces outils puissent exister sur beaucoup voire la plupart des systèmes d'exploitation, il n'y a pas de garantie qu'ils existeront sur tous les systèmes où l'application sera exécutée à l'avenir, ou si la version disponible sur un système futur sera compatible avec l'application. Si l'application dépend d'un outil système, cet outil doit être distribué avec l'application. 13 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/config.md: -------------------------------------------------------------------------------- 1 | ## III. Config 2 | ### Store config in the environment 3 | 4 | An app's *config* is everything that is likely to vary between [deploys](./codebase) (staging, production, developer environments, etc). This includes: 5 | 6 | * Resource handles to the database, Memcached, and other [backing services](./backing-services) 7 | * Credentials to external services such as Amazon S3 or Twitter 8 | * Per-deploy values such as the canonical hostname for the deploy 9 | 10 | Apps sometimes store config as constants in the code. This is a violation of twelve-factor, which requires **strict separation of config from code**. Config varies substantially across deploys, code does not. 11 | 12 | A litmus test for whether an app has all config correctly factored out of the code is whether the codebase could be made open source at any moment, without compromising any credentials. 13 | 14 | Note that this definition of "config" does **not** include internal application config, such as `config/routes.rb` in Rails, or how [code modules are connected](http://docs.spring.io/spring/docs/current/spring-framework-reference/html/beans.html) in [Spring](http://spring.io/). This type of config does not vary between deploys, and so is best done in the code. 15 | 16 | Another approach to config is the use of config files which are not checked into revision control, such as `config/database.yml` in Rails. This is a huge improvement over using constants which are checked into the code repo, but still has weaknesses: it's easy to mistakenly check in a config file to the repo; there is a tendency for config files to be scattered about in different places and different formats, making it hard to see and manage all the config in one place. Further, these formats tend to be language- or framework-specific. 17 | 18 | **The twelve-factor app stores config in *environment variables*** (often shortened to *env vars* or *env*). Env vars are easy to change between deploys without changing any code; unlike config files, there is little chance of them being checked into the code repo accidentally; and unlike custom config files, or other config mechanisms such as Java System Properties, they are a language- and OS-agnostic standard. 19 | 20 | Another aspect of config management is grouping. Sometimes apps batch config into named groups (often called "environments") named after specific deploys, such as the `development`, `test`, and `production` environments in Rails. This method does not scale cleanly: as more deploys of the app are created, new environment names are necessary, such as `staging` or `qa`. As the project grows further, developers may add their own special environments like `joes-staging`, resulting in a combinatorial explosion of config which makes managing deploys of the app very brittle. 21 | 22 | In a twelve-factor app, env vars are granular controls, each fully orthogonal to other env vars. They are never grouped together as "environments", but instead are independently managed for each deploy. This is a model that scales up smoothly as the app naturally expands into more deploys over its lifetime. 23 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/build-release-run.md: -------------------------------------------------------------------------------- 1 | ## V. Assemblez, publiez, exécutez 2 | ### Séparez strictement les étapes d'assemblage et d'exécution 3 | 4 | Une [base de code](./codebase) est transformée en un déploiement (non-développement) à travers les étapes suivantes : 5 | 6 | * L'*étape d'assemblage* (ou "build") est une transformation qui convertit un dépôt de code en un paquet autonome exécutable appelé l'assemblage (ou "build"). En utilisant une version du code référencée par un commit spécifié lors du processus de déploiement, l'étape d'assemblage va chercher toutes les [dépendances externes](./dependencies) et compile les fichiers binaires et les ressources. 7 | * L'*étape de publication * (ou "release") prend l'assemblage produit à l'étape précédente et le combine avec la [configuration](./config) de déploiement courante. La release résultante contient à la fois l'assemblage et la configuration, et elle est prête pour une exécution immédiate dans l'environnement d'exécution. 8 | * L'*étape d'exécution* (ou "runtime") fait fonctionner l'application dans l'environnement d'exécution, en lançant un ensemble de [processus](./processes) de l'application associée à la release considérée. 9 | 10 | ![Le code devient un assemblage, qui est combiné à la configuration pour créer une release](/images/release.png) 11 | 12 | **Les applications 12 facteurs ont une séparation stricte entre les étapes d'assemblage, de publication et d'exécution.** Par exemple, il est impossible de faire des changements dans le code au moment de son exécution, car il n'y a pas moyen de propager ces changements vers l'étape de build. 13 | 14 | Les outils de déploiement offrent généralement des outils de gestion de release, permettant notamment de revenir à une release antérieure. Par exemple, l'outil de déploiement [Capistrano](https://github.com/capistrano/capistrano/wiki) stocke les releases dans un sous-répertoire appelé `releases`, où la release courante est un lien symbolique vers le répertoire de release courante. Sa commande `rollback` permet de facilement revenir à une release précédente. 15 | 16 | Chaque release devrait toujours avoir un identifiant unique, comme un horodatage (timestamp) de la release (tel que `2011-04-06-20:32:17`) ou un nombre incrémental (tel que `v100`). La liste des releases est accessible en écriture incrémentale uniquement, et il n'est pas possible de modifier une release une fois qu'elle a été réalisée. Tout changement doit créer une nouvelle release. 17 | 18 | Les assemblages sont initiés par le développeur de l'application dès que du nouveau code est déployé. Son exécution, au contraire, peut avoir lieu automatiquement en cas d'un reboot du serveur, ou du crash d'un processus qui est relancé par le gestionnaire de processus. De ce fait, l'étape d'exécution doit se limiter à un nombre minimal de parties mobiles, car les problèmes qui empêchent une application de fonctionner peuvent entraîner des dysfonctionnements au milieu de la nuit alors qu'aucun développeur ne sera là pour les corriger. L'étape d'assemblage peut être plus complexe, car les erreurs pourront toujours être résolues par le développeur qui réalise le déploiement. 19 | 20 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/backing-services.md: -------------------------------------------------------------------------------- 1 | ## IV. Services externes 2 | ### Traitez les services externes comme des ressources attachées 3 | 4 | Un *service externe* (backing service) correspond à tout service que l'application utilise à travers le réseau pour son fonctionnement nominal. Cela concerne par exemple les bases de données (tel que [MySQL](http://dev.mysql.com/) ou [CouchDB](http://couchdb.apache.org/)), les systèmes de messages/files (tel que [RabbitMQ](http://www.rabbitmq.com/) ou [Beanstalkd](https://beanstalkd.github.io)), les services SMTP pour l'envoi d'email (comme [Postfix](http://www.postfix.org/)), ainsi que les systèmes de cache (comme [Memcached](http://memcached.org/)). 5 | 6 | Les *services externes* comme la base de données sont le plus souvent gérés par les mêmes administrateurs réseau que ceux qui gèrent l'application de production. En plus de ces services gérés localement, l'application peut également avoir besoin de services gérés par des tiers. Cela concerne par exemple les services SMTP (comme [Postmark](http://postmarkapp.com/)), les services de gestion de métriques (comme [New Relic](http://newrelic.com/) ou [Loggly](http://www.loggly.com/)), les services de ressources binaires (comme [Amazon S3](http://aws.amazon.com/s3/)), et même les services que l'on peut consommer à travers une API (comme [Twitter](http://dev.twitter.com/), [Google Maps](https://developers.google.com/maps/), ou [Last.fm](http://www.last.fm/api)). 7 | 8 | **Le code d'une application 12 facteurs ne fait pas de distinction entre les services locaux et les services tiers**. Pour l'application, ce sont tous les deux des ressources attachées, accessibles via une URL ou un autre système de localisation et d'authentification stockée dans la [configuration](./config). Un [déploiement](./codebase) d'une application 12 facteurs doit pouvoir remplacer une base de données MySQL locale par une autre gérée par des tiers ([Amazon RDS](http://aws.amazon.com/rds/), par exemple) sans le moindre changement dans le code de l'application. De la même manière, un serveur SMTP local doit pouvoir être remplacé par un service tiers (Postmark, par exemple) sans changements dans le code. Dans les deux cas, seules les informations de configurations doivent changer. 9 | 10 | Chaque service externe est une *ressource*. Par exemple, une base de données MySQL est une ressource. Deux bases de données MySQL (utilisées pour faire du sharding dans la couche applicative) correspondent à deux ressources distinctes. L'application 12 facteurs traite ces bases de données comme des ressources attachées, ce qui indique leur couplage faible au déploiement auquel elles sont rattachées. 11 | 12 | Un déploiement de production lié à quatre services externes. 13 | 14 | Les ressources peuvent être attachées et détachées à volonté à des déploiements. Par exemple, si la base de données de l'application pose problème pour des raisons matérielles, l'administrateur de l'application peut vouloir lancer un nouveau serveur de base de données restauré à partir d'une sauvegarde récente. L'application courante pourrait être détachée de l'ancienne, puis rattachée à la nouvelle — le tout sans changement dans le code. 15 | -------------------------------------------------------------------------------- /rag_assistant/streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import openai 3 | from llama_index.llms.openai import OpenAI 4 | 5 | try: 6 | from llama_index import VectorStoreIndex, ServiceContext, Document, SimpleDirectoryReader 7 | except ImportError: 8 | from llama_index.core import VectorStoreIndex, ServiceContext, Document, SimpleDirectoryReader 9 | 10 | st.set_page_config(page_title="Chat with the Streamlit docs, powered by LlamaIndex", page_icon="🦙", layout="centered", 11 | initial_sidebar_state="auto", menu_items=None) 12 | openai.api_key = st.secrets.OPENAI_API_KEY 13 | st.title("Chat with docs, powered by Streamlit & LlamaIndex 💬🦙") 14 | st.info( 15 | "Check out the full tutorial to build this app in our [blog post](https://blog.streamlit.io/build-a-chatbot-with-custom-data-sources-powered-by-llamaindex/)", 16 | icon="📃") 17 | 18 | if "messages" not in st.session_state.keys(): # Initialize the chat messages history 19 | st.session_state.messages = [ 20 | {"role": "assistant", "content": "Ask me a question about cloud application development best practice!"} 21 | ] 22 | 23 | 24 | @st.cache_resource(show_spinner=False) 25 | def load_data(): 26 | with st.spinner(text="Loading and indexing the Streamlit docs – hang tight! This should take 1-2 minutes."): 27 | reader = SimpleDirectoryReader(input_dir="./data", recursive=True) 28 | docs = reader.load_data() 29 | # llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5, system_prompt="You are an expert o$ 30 | # index = VectorStoreIndex.from_documents(docs) 31 | service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo", temperature=0.5, 32 | system_prompt="You are an expert on the Streamlit Python library and your job is to answer technical questions. Assume that all questions are related to the Streamlit Python library. Keep your answers technical and based on facts – do not hallucinate features.")) 33 | index = VectorStoreIndex.from_documents(docs, service_context=service_context) 34 | return index 35 | 36 | 37 | index = load_data() 38 | 39 | if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine 40 | st.session_state.chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) 41 | 42 | if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history 43 | st.session_state.messages.append({"role": "user", "content": prompt}) 44 | 45 | for message in st.session_state.messages: # Display the prior chat messages 46 | with st.chat_message(message["role"]): 47 | st.write(message["content"]) 48 | 49 | # If last message is not from assistant, generate a new response 50 | if st.session_state.messages[-1]["role"] != "assistant": 51 | with st.chat_message("assistant"): 52 | with st.spinner("Thinking..."): 53 | response = st.session_state.chat_engine.chat(prompt) 54 | st.write(response.response) 55 | message = {"role": "assistant", "content": response.response} 56 | st.session_state.messages.append(message) # Add response to message history -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/disposability.md: -------------------------------------------------------------------------------- 1 | ## IX. Jetable 2 | ### Maximisez la robustesse avec des démarrages rapides et des arrêts gracieux 3 | 4 | **Les [processus](./processes) des applications 12 facteurs sont *jetables*, c'est-à-dire qu'ils peuvent être démarrés ou stoppés en un instant.** Cela simplifie un rapide grossissement vertical, le déploiement rapide du [code](./codebase) ou de changements dans la [configuration](./config), ainsi que la robustesse des déploiements de production. 5 | 6 | Les processus doivent viser à **minimiser le temps de démarrage**. Idéalement, un processus prend quelques secondes entre le moment où une commande le lance et celui où il est en marche et prêt à recevoir des requêtes ou du travail. Un court temps de démarrage rend les processus de [release](./build-release-run) et de scalabilité verticale plus agiles; il aide également à la robustesse, car les gestionnaires de processus peuvent plus facilement déplacer des processus vers de nouvelles machines physiques lorsque c'est nécessaire. 7 | 8 | Les processus **s'éteignent gracieusement lorsqu'ils reçoivent un signal [SIGTERM (fr)](https://fr.wikipedia.org/wiki/SIGTERM)** du gestionnaire de processus. Pour un processus web, s'éteindre en douceur se fait en arrêtant d'écouter sur le port de service (refusant, par la même occasion, toute nouvelle requête), en permettant à la requête courante de se terminer, et en quittant ensuite. Ce qui est implicite dans ce modèle, c'est que les requêtes sont courtes (pas plus de quelques secondes), ou dans le cas de longues requêtes, les clients doivent pouvoir tenter de se reconnecter sans problème lorsque la connection est perdue. 9 | 10 | Pour un processus de worker, s'éteindre gracieusement est réalisé en renvoyant le travail en cours dans la file de travaux. Par exemple, avec [RabbitMQ](http://www.rabbitmq.com/) le worker peut envoyer un message [`NACK`](http://www.rabbitmq.com/amqp-0-9-1-quickref.html#basic.nack); avec [Beanstalkd](https://beanstalkd.github.io), le travail est renvoyé dans la file automatiquement dès qu'un worker se déconnecte. Les systèmes basés sur des verrous, comme [Delayed Job](https://github.com/collectiveidea/delayed_job#readme) doivent s'assurer de supprimer le verrou de leur travail en cours. Il est implicite dans ce modèle que toutes les tâches sont [réentrantes (fr)](http://fr.wikipedia.org/wiki/R%C3%A9entrance), ce qui est réalisé en englobant les résultats dans une transaction, ou en rendant l'opération [idempotente (fr)](http://fr.wikipedia.org/wiki/Idempotence). 11 | 12 | Les processus doivent également être **robustes face aux morts subites**, dans le cas d'une panne du hardware sous-jacent. Bien que ce soit bien moins courant qu'un arrêt gracieux avec `SIGTERM`, cela peut arriver malgré tout. L'approche recommandée est l'utilisation d'un backend robuste de files de messages, tel que Beanstalkd, capable de renvoyer les tâches dans la file lorsqu'un client se déconnecte ou ne répond plus. Dans les deux cas, une application 12 facteurs est structurée pour gérer des fins inattendues et non-gracieuses. Le [design crash-only (en)](http://lwn.net/Articles/191059/) amène ce concept à sa [conclusion logique (en)](http://docs.couchdb.org/en/latest/intro/overview.html). 13 | 14 | 15 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/concurrency.md: -------------------------------------------------------------------------------- 1 | ## VIII. Concurrence 2 | ### Grossissez à l'aide du modèle de processus 3 | 4 | Tout programme informatique, lorsqu'il s'exécute, est représenté par un ou plusieurs processus. Les applications web ont adopté différentes approches d'exécution de processus. Par exemple, les processus PHP s'exécutent comme des processus fils d'Apache, démarrés à la demande lorsque c'est requis par le volume de requêtes. Les processus Java ont adopté l'approche inverse, avec une machine virtuelle qui fournit un super-processus massif qui réserve un gros bloc de ressources système (processeur et mémoire) au démarrage, et la concurrence est gérée en interne à l'aide de threads. Dans les deux cas, les processus qui tournent sont à peine visibles aux développeurs de l'application. 5 | 6 | ![La scalabilité est exprimée par des processus qui s'exécutent, la diversité de la charge de travail est exprimée par les types de processus](/images/process-types.png) 7 | 8 | **Dans une application 12 facteurs, les processus sont des élèves modèles**. Les processus dans une application 12 facteurs s'inspirent fortement du [modèle de processus unix pour faire fonctionner les daemon (en)](https://adam.herokuapp.com/past/2011/5/9/applying_the_unix_process_model_to_web_apps/). En utilisant ce modèle, les développeurs peuvent structurer l'application pour gérer différents types de charge en assignant chaque type de travail à un *type de processus*. Par exemple, les requêtes HTTP peuvent être gérées par un processus web, et les tâches d'arrière-plan ayant une longue durée d'exécution peuvent être des processus dits "worker". 9 | 10 | Chaque processus peut malgré tout et individuellement, gérer son propre multiplexage interne, avec des threads à l'intérieur de la machine virtuelle d'exécution, ou à l'aide du modèle d'évènements asynchrones que l'on retrouve dans des outils comme [EventMachine](https://github.com/eventmachine/eventmachine), [Twisted](http://twistedmatrix.com/trac/), ou [Node.js](http://nodejs.org/). Mais une machine virtuelle a individuellement une taille limitée (grandissement vertical), donc l'application doit également pouvoir déclencher plusieurs processus qui tournent sur plusieurs machines physiques. 11 | 12 | Le modèle de processus prend de l'envergure dès qu'il est question de grossir. La [nature sans partage, avec une partition horizontale des processus des applications 12 facteurs](./processes) signifie qu'ajouter plus de concurrence est une opération simple et fiable. La liste des types de processus et du nombre de processus de chaque type est appelée *formation de processus*. 13 | 14 | Les processus des applications 12 facteurs ne devraient [jamais être des daemons (en)](http://dustin.github.com/2010/02/28/running-processes.html) ou écrire des fichiers PID. À la place, utilisez le gestionnaire de processus du système d'exploitation (tel que [systemd](https://www.freedesktop.org/wiki/Software/systemd/), un gestionnaire de processus distribué sur une plateforme cloud, ou un outil comme [Foreman (en)](http://blog.daviddollar.org/2011/05/06/introducing-foreman.html) durant le développement) pour gérer les [flux de sortie](./logs), répondre à un processus qui plante, et gérer les redémarrages et les arrêts initiés par les utilisateurs. 15 | -------------------------------------------------------------------------------- /conf/config.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | APP_NAME=LLM+RAG Assistant 3 | 4 | [MODEL_PROVIDER] 5 | # AZURE OPENAI MISTRAL BEDROCK 6 | MODEL_PROVIDER = BEDROCK 7 | 8 | [AZURE] 9 | # omega-gpt4-32k-deployment omega-gpt4-8k-deployment omega-gpt35turbo text-embedding-ada-002 10 | AZURE_OPENAI_DEPLOYMENT=omega-gpt4-32k-deployment 11 | # ada-2 text-embedding-ada-002 12 | AZURE_OPENAI_EMBEDDING_DEPLOYMENT=ada-2 13 | AZURE_OPENAI_ENDPOINT=https://azopenai-01-fr.openai.azure.com/ 14 | AZURE_OPENAI_API_VERSION=2023-07-01-preview 15 | CHAT_MODEL=omega-gpt4-32k-deployment 16 | 17 | [OPENAI] 18 | # gpt-4-turbo gpt-3.5-turbo gpt-4o 19 | OPENAI_MODEL_NAME=gpt-4o 20 | EMBEDDINGS_MODEL=text-embedding-ada-002 21 | 22 | [MISTRAL] 23 | # mistral-tiny, mistral-small, mistral-medium, mistral-large 24 | CHAT_MODEL=mistral-large-latest 25 | EMBEDDINGS_MODEL=mistral-embed 26 | 27 | [BEDROCK] 28 | #anthropic-claude 29 | CHAT_MODEL=anthropic.claude-3-sonnet-20240229-v1:0 30 | CLIENT=anthropic.claude-3-sonnet-20240229-v1:0 31 | # amazon.titan-embed-image-v1 amazon.titan-embed-text-v1 32 | EMBEDDINGS_MODEL=amazon.titan-embed-image-v1 33 | AWS_PROFILE_NAME= 34 | AWS_REGION_NAME=eu-west-3 35 | BEDROCK_ENDPOINT_URL=https://bedrock-runtime.eu-west-3.amazonaws.com 36 | 37 | ;CHAT_MODEL=mistral.mistral-large-2402-v1:0 38 | ;CLIENT=mistral.mistral-large-2402-v1:0 39 | ;EMBEDDINGS_MODEL=amazon.titan-embed-image-v1 40 | ;AWS_PROFILE_NAME= 41 | ;AWS_REGION_NAME=eu-west-3 42 | ;BEDROCK_ENDPOINT_URL=https://bedrock-runtime.eu-west-3.amazonaws.com 43 | 44 | [VECTORDB] 45 | # faiss or chroma or opensearch 46 | # moving to faiss due to "ModuleNotFoundError: No module named 'hnswlib'" issue with ChromaDB 47 | # ChromaDB 0.4.3 solve the issue but is not supported by llama_index that is compatible with mistralai 48 | # using latest version of chroma and chroma-hnswlib lib does not make it 49 | vectordb = chroma 50 | chroma_persist_directory = data/chroma/ 51 | faiss_persist_directory = data/faiss 52 | collection_name = Default 53 | opensearch_url = https://vpc-ai-assistant-ysxpkzzgfwwgfbg35qz3tal4ee.eu-west-1.es.amazonaws.com 54 | opensearch_aws_region = eu-west-1 55 | opensearch_bulk_size = 500 56 | 57 | [KNOWLEDGE] 58 | CHILD_CHUNK_SIZE=400 59 | CHILD_CHUNK_OVERLAP=20 60 | 61 | PARENT_CHUNK_SIZE=2000 62 | PARENT_CHUNK_OVERLAP=0 63 | 64 | 65 | [LANGCHAIN] 66 | verbose = True 67 | # "stuff", "map_reduce", "refine", "map_rerank" 68 | chain_type = stuff 69 | # "similarity", "mmr", "similarity_score_threshold" 70 | SEARCH_TYPE = mmr 71 | SEARCH_TOP_K = 6 72 | 73 | [CACHE] 74 | CACHE_FOLDER=data/cache 75 | 76 | [FILE_MANAGEMENT] 77 | UPLOAD_DIRECTORY=data/cache/upload_directory 78 | 79 | [VISION] 80 | ; BEDROCK ANTHROPIC CLAUDE 3 81 | VISION_PROVIDER=BEDROCK 82 | VISION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0 83 | VISION_EMBEDDINGS=amazon.titan-embed-image-v1 84 | IMAGE_OUTPUT_DIR=data/cache/vision 85 | 86 | [LLAMA_INDEX] 87 | LLAMA_INDEX_ROOT_DIR=data/llama_index 88 | SENTENCE_INDEX_DIR=sentence_index 89 | MERGING_INDEX_DIR=merging_index 90 | SUBQUERY_INDEX_DIR=subquery_index 91 | SUMMARY_INDEX_DIR=summary_index 92 | 93 | [DATACATALOG] 94 | DATA_CATALOG=data/data-catalog/data-catalog.xlsx 95 | 96 | [DATABASE] 97 | # ATHENA or DUCKDB 98 | DATABASE_SOURCE=DUCKDB 99 | 100 | [ATHENA] 101 | QUALIFIED_TABLE_NAME= 102 | SCHEMA_PATH= 103 | 104 | [DUCKDB] 105 | QUALIFIED_TABLE_NAME=data/duckdb... 106 | SCHEMA_PATH=data/duckdb/ 107 | 108 | [DOCUMENTS_STORAGE] 109 | # S3 or LOCAL or NONE 110 | INTERFACE=LOCAL 111 | DOCUMENTS_PATH=data 112 | S3_BUCKET_NAME= 113 | -------------------------------------------------------------------------------- /tests/utils/test_utilsrag_lc.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from dotenv import load_dotenv, find_dotenv 4 | from langchain_community.document_loaders.pdf import PyPDFLoader 5 | from langchain_community.vectorstores.chroma import Chroma 6 | from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings 7 | 8 | import numpy as np 9 | 10 | import nest_asyncio 11 | 12 | from rag_assistant.utils.utilsrag_lc import agent_lc_factory 13 | 14 | load_dotenv(find_dotenv()) 15 | 16 | # Set OpenAI API key from Streamlit secrets 17 | openai_api_key = os.getenv('OPENAI_API_KEY') 18 | 19 | nest_asyncio.apply() 20 | 21 | 22 | def get_openai_api_key(): 23 | _ = load_dotenv(find_dotenv()) 24 | 25 | return os.getenv("OPENAI_API_KEY") 26 | 27 | 28 | def get_hf_api_key(): 29 | _ = load_dotenv(find_dotenv()) 30 | 31 | return os.getenv("HUGGINGFACE_API_KEY") 32 | 33 | 34 | from trulens_eval import ( 35 | Feedback, 36 | TruLlama, 37 | OpenAI, 38 | Tru, TruChain, Select 39 | ) 40 | 41 | openai = OpenAI() 42 | 43 | qa_relevance = ( 44 | Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance") 45 | .on_input_output() 46 | ) 47 | 48 | qs_relevance = ( 49 | Feedback(openai.relevance_with_cot_reasons, name="Context Relevance") 50 | .on_input() 51 | .on(TruLlama.select_source_nodes().node.text) 52 | .aggregate(np.mean) 53 | ) 54 | 55 | groundedness = ( 56 | Feedback(openai.groundedness_measure_with_cot_reasons, name = "Groundedness") 57 | .on(Select.RecordCalls.retrieve.rets.collect()) 58 | .on_output() 59 | ) 60 | 61 | feedbacks = [qa_relevance, qs_relevance, groundedness] 62 | 63 | 64 | def get_prebuilt_trulens_recorder(query_engine, app_id): 65 | tru_recorder = TruChain( 66 | query_engine, 67 | app_id=app_id, 68 | feedbacks=feedbacks 69 | ) 70 | return tru_recorder 71 | 72 | 73 | # @pytest.fixture(scope="module") 74 | def temp_dir(request): 75 | # Setup: Create a temporary directory for the test module 76 | # TODO should do something with the vectordb 77 | pass 78 | 79 | 80 | @pytest.fixture 81 | def llm_prepare(): 82 | # llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1) 83 | llm = ChatMistralAI() 84 | 85 | return llm 86 | 87 | 88 | @pytest.fixture 89 | def embeddings_prepare(): 90 | # embed_model = OpenAIEmbeddings() 91 | embed_model = MistralAIEmbeddings() 92 | 93 | return embed_model 94 | 95 | 96 | @pytest.fixture 97 | def docs_prepare(): 98 | loader = PyPDFLoader("tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf") 99 | documents = loader.load() 100 | return documents 101 | 102 | 103 | @pytest.fixture 104 | def eval_questions_prepare(): 105 | eval_questions = [] 106 | with open('tests/utils/eval_questions.txt', 'r') as file: 107 | for line in file: 108 | # Remove newline character and convert to integer 109 | item = line.strip() 110 | print(item) 111 | eval_questions.append(item) 112 | return eval_questions 113 | 114 | 115 | def test_lc_agent_stuff_4_similarity(llm_prepare, embeddings_prepare, docs_prepare, eval_questions_prepare, trulens_prepare): 116 | 117 | db = Chroma.from_documents( 118 | documents=docs_prepare, 119 | embedding=embeddings_prepare, 120 | collection_name="Test_RAG_LC", 121 | ) 122 | 123 | retrieval_qa_chain = agent_lc_factory(chain_type="stuff", 124 | llm=llm_prepare, 125 | search_kwargs={"k": 4}, 126 | search_type="similarity", vectorstore=db) 127 | 128 | response = retrieval_qa_chain("How do I get started on a personal project in AI?") 129 | print(f"response: {str(response)}") 130 | assert response is not None, "L'interprétation n'a pas retourné de résultat." -------------------------------------------------------------------------------- /.github/workflows/aws.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build and push a new container image to Amazon ECR, 2 | # and then will deploy a new task definition to Amazon ECS, when there is a push to the "main" branch. 3 | # 4 | # To use this workflow, you will need to complete the following set-up steps: 5 | # 6 | # 1. Create an ECR repository to store your images. 7 | # For example: `aws ecr create-repository --repository-name my-ecr-repo --region us-east-2`. 8 | # Replace the value of the `ECR_REPOSITORY` environment variable in the workflow below with your repository's name. 9 | # Replace the value of the `AWS_REGION` environment variable in the workflow below with your repository's region. 10 | # 11 | # 2. Create an ECS task definition, an ECS cluster, and an ECS service. 12 | # For example, follow the Getting Started guide on the ECS console: 13 | # https://us-east-2.console.aws.amazon.com/ecs/home?region=us-east-2#/firstRun 14 | # Replace the value of the `ECS_SERVICE` environment variable in the workflow below with the name you set for the Amazon ECS service. 15 | # Replace the value of the `ECS_CLUSTER` environment variable in the workflow below with the name you set for the cluster. 16 | # 17 | # 3. Store your ECS task definition as a JSON file in your repository. 18 | # The format should follow the output of `aws ecs register-task-definition --generate-cli-skeleton`. 19 | # Replace the value of the `ECS_TASK_DEFINITION` environment variable in the workflow below with the path to the JSON file. 20 | # Replace the value of the `CONTAINER_NAME` environment variable in the workflow below with the name of the container 21 | # in the `containerDefinitions` section of the task definition. 22 | # 23 | # 4. Store an IAM user access key in GitHub Actions secrets named `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. 24 | # See the documentation for each action used below for the recommended IAM policies for this IAM user, 25 | # and best practices on handling the access key credentials. 26 | 27 | name: Deploy to Amazon ECS 28 | 29 | on: 30 | push: 31 | branches: [ "main" ] 32 | 33 | env: 34 | AWS_REGION: eu-west-1 # set this to your preferred AWS region, e.g. us-west-1 35 | ECR_REPOSITORY: ai_assistant # set this to your Amazon ECR repository name 36 | ECS_SERVICE: ai_assistant-service-iac-https # set this to your Amazon ECS service name 37 | ECS_CLUSTER: ai_assistant # set this to your Amazon ECS cluster name 38 | CONTAINER_NAME: ai_assistant # set this to the name of the container in the 39 | # containerDefinitions section of your task definition 40 | 41 | permissions: 42 | contents: read 43 | 44 | jobs: 45 | deploy: 46 | name: Deploy 47 | runs-on: ubuntu-latest 48 | environment: production 49 | 50 | steps: 51 | - name: Checkout 52 | uses: actions/checkout@v3 53 | 54 | - name: Configure AWS credentials 55 | uses: aws-actions/configure-aws-credentials@v1 56 | with: 57 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 58 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 59 | aws-region: ${{ env.AWS_REGION }} 60 | 61 | - name: Login to Amazon ECR 62 | id: login-ecr 63 | uses: aws-actions/amazon-ecr-login@v1 64 | 65 | - name: Build, tag, and push image to Amazon ECR 66 | id: build-image 67 | env: 68 | ECR_REGISTRY: 441525731509.dkr.ecr.eu-west-1.amazonaws.com 69 | IMAGE_TAG: latest 70 | run: | 71 | # Build a docker container and 72 | # push it to ECR so that it can 73 | # be deployed to ECS. 74 | docker build -t $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG . 75 | docker push $ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG 76 | echo "image=$ECR_REGISTRY/$ECR_REPOSITORY:$IMAGE_TAG" >> $GITHUB_OUTPUT 77 | 78 | - name: Deploy to ECS 79 | run: | 80 | aws ecs update-service --cluster $ECS_CLUSTER --service $ECS_SERVICE --force-new-deployment 81 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/config.md: -------------------------------------------------------------------------------- 1 | ## III. Configuration 2 | ### Stockez la configuration dans l'environnement 3 | 4 | La *configuration* d'une application est tout ce qui est susceptible de varier entre des [déploiements](./codebase) (validation, production, environnement de développement, etc.). Cela inclut : 5 | 6 | * Les ressources gérées par la base de données, Memcached, ou tout autre [service de stockage](./backing-services) 7 | * Les identifiants pour des services externes, tel qu'Amazon S3 ou Twitter 8 | * Les valeurs spécifiques au déploiement, tel que son nom d'hôte canonique 9 | 10 | Les applications stockent parfois la configuration avec des constantes dans le code. C'est une violation des 12 facteurs, qui requiert une **stricte séparation de la configuration et du code**. La configuration peut varier substantiellement à travers les déploiements, alors que ce n'est pas le cas du code. 11 | 12 | Un bon moyen de tester si une application a correctement séparé son code, c'est de se demander si l'application pourrait être rendue open-source à tout instant, sans compromettre d'identifiants. 13 | 14 | Notez que cette définition de "configuration" n'inclut **pas** la configuration interne de l'application, tel que `config/routes.rb` avec Rails, ou comment [les modules du noyau sont connectés (en)](http://docs.spring.io/spring/docs/current/spring-framework-reference/html/beans.html) dans [Spring](http://spring.io/). Ce type de configuration ne varie pas à travers les déploiements, et est ainsi mieux réalisé dans le code. 15 | 16 | Une autre approche de la configuration, c'est d'utiliser des fichiers de configuration qui ne sont pas inclus dans le système de contrôle de version, par exemple `config/database.yml` de Rails. C'est une amélioration considérable par rapport à l'utilisation de constantes qui sont versionnées dans le dépôt de code, mais a toujours des faiblesses : il est facile d'ajouter par inadvertance un fichier de configuration dans le dépôt. Il y a une tendance à ce que les fichiers de configuration soient dispersés à différents endroits et dans différents formats, rendant ainsi difficile de voir et gérer la configuration à un unique endroit. De plus, ces formats ont tendance à être spécifiques à un langage ou un framework. 17 | 18 | **Les applications 12 facteurs stockent la configuration dans des *variables d'environnement*** (souvent raccourcies en *variables d'env*, ou *env*). Les variables d'environnement sont faciles à changer entre des déploiements sans changer le moindre code ; contrairement aux fichiers de configuration, il y a peu de chance pour qu'elles soient ajoutées au dépôt de code accidentellement ; et contrairement aux fichiers de configuration personnalisés, ou tout autre mécanisme de configuration comme les propriétés système Java, ce sont des standards agnostiques du langage ou du système d'exploitation. 19 | 20 | Un autre aspect de la gestion de configuration est le groupage. Parfois, les applications regroupent la configuration dans des groupes nommés (souvent appelés les "environnements"), nommés ainsi d'après des déploiements spécifiques, comme les environnements `development`, `test`, et `production` de Rails. Cette méthode ne permet pas de grossir proprement : lorsque l'on ajoute de nouveaux déploiement à l'application, de nouveaux noms d'environnement sont nécessaires, comme `validation` ou `qa`. Quand le projet grossit encore plus, les développeurs vont avoir tendance à ajouter leurs propres environnements particuliers, comme `joes-validation`, ce qui entraîne une explosion combinatoire de la configuration qui rend la gestion des déploiements de l'application très fragile. 21 | 22 | Dans une application 12 facteurs, les variables d'environnement permettent un contrôle granulaire, chacune complètement orthogonale aux autres variables d'environnement. Elles ne sont jamais groupées ensemble en "environnements", mais sont plutôt gérées indépendamment pour chaque déploiement. C'est un modèle qui permet de grossir verticalement en souplesse, lorsque l'application grossit naturellement en un plus grand nombre de déploiements au cours de sa vie. 23 | -------------------------------------------------------------------------------- /rag_assistant/pages/2_Load_Document.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import streamlit as st 4 | import os 5 | import io 6 | 7 | from llama_index.core import SimpleDirectoryReader 8 | from llama_index.core.schema import Document as LIDocument 9 | 10 | from utils.auth import check_password 11 | from utils.constants import DocumentType, SupportedFileType, Metadata, CollectionType 12 | from utils.config_loader import load_config 13 | from utils.utilsdoc import load_doc, load_store 14 | from utils.utilsrag_li import build_summary_index 15 | 16 | from utils.utilsvision import load_image 17 | from utils.utilsfile import put_file 18 | 19 | config = load_config() 20 | 21 | app_name = config['DEFAULT']['APP_NAME'] 22 | collection_name = config['VECTORDB']['collection_name'] 23 | upload_directory = config['FILE_MANAGEMENT']['UPLOAD_DIRECTORY'] 24 | 25 | st.set_page_config(page_title=f"""📄 {app_name} 🤗""", page_icon="📄") 26 | 27 | 28 | def main(): 29 | st.title(f"""Chargement des Connaissances 📄""") 30 | 31 | # with st.form("Upload File"): 32 | topic_name = st.text_input("Thème du document (ex: API, Cloud, Data, Architecture, Sécurité, ...)") 33 | 34 | file_type = st.radio("Type de document", [e.value for e in DocumentType], index=None) 35 | 36 | pdfs = st.file_uploader("Document(s) à transmettre", type=[e.value for e in SupportedFileType], 37 | accept_multiple_files=True) 38 | 39 | disabled = True 40 | if (file_type is not None) and (topic_name is not None) and (pdfs is not None) and (len(pdfs)): 41 | disabled = False 42 | 43 | 44 | with st.container(): 45 | st.subheader("Traitement des images") 46 | analyse_images = st.checkbox("Analyser les images") 47 | image_only = st.checkbox("Traiter uniquement les images (test mode)", disabled=(not analyse_images)) 48 | restart_image_analysis = st.checkbox("Relancer l'analyse d'image (test mode)", disabled=(not analyse_images)) 49 | 50 | with st.container(): 51 | st.subheader("Autres options") 52 | generate_summary = st.checkbox("Générer le sommaire", disabled=True) 53 | upload_only = st.checkbox("Enregistrement des documents uniquement") 54 | 55 | if st.button("Transmettre", disabled=disabled): 56 | 57 | 58 | upload_files(analyse_images, file_type, generate_summary, image_only, pdfs, restart_image_analysis, topic_name, upload_only) 59 | 60 | 61 | def upload_files(analyse_images, file_type, generate_summary, image_only, pdfs, restart_image_analysis, topic_name, upload_only): 62 | file_paths = [] 63 | if not os.path.exists(upload_directory): 64 | os.makedirs(upload_directory) 65 | for pdf in pdfs: 66 | file_path = os.path.join(upload_directory, pdf.name) 67 | with open(file_path, 'wb') as f: 68 | f.write(pdf.read()) 69 | file_paths.append(file_path) 70 | put_file(io.BytesIO(pdf.getvalue()), pdf.name, CollectionType.DOCUMENTS.value) 71 | metadata = {Metadata.DOCUMENT_TYPE.value: file_type, Metadata.TOPIC.value: topic_name} 72 | docs = [] 73 | if not image_only: 74 | docs += load_doc(pdfs, metadata) 75 | if analyse_images: 76 | image_docs = load_image(pdfs, metadata, restart_image_analysis) 77 | docs += image_docs 78 | if not upload_only: 79 | load_store(docs, collection_name=collection_name) 80 | if generate_summary: 81 | docs_li = docs_prepare( 82 | #input_files=file_paths, 83 | input_dir=upload_directory 84 | ) 85 | summary_index = build_summary_index(docs_li) 86 | 87 | 88 | def docs_prepare(input_files: Optional[list[str]] = None, input_dir: Optional[str] = None) -> list[LIDocument]: 89 | documents = SimpleDirectoryReader( 90 | input_files=input_files, 91 | input_dir=input_dir, 92 | required_exts=["."+e.value for e in SupportedFileType] 93 | ).load_data() 94 | return documents 95 | 96 | 97 | if __name__ == "__main__": 98 | if not check_password(): 99 | # Do not continue if check_password is not True. 100 | st.stop() 101 | main() 102 | -------------------------------------------------------------------------------- /data/sources/md/12factor/en/dev-prod-parity.md: -------------------------------------------------------------------------------- 1 | ## X. Dev/prod parity 2 | ### Keep development, staging, and production as similar as possible 3 | 4 | Historically, there have been substantial gaps between development (a developer making live edits to a local [deploy](./codebase) of the app) and production (a running deploy of the app accessed by end users). These gaps manifest in three areas: 5 | 6 | * **The time gap**: A developer may work on code that takes days, weeks, or even months to go into production. 7 | * **The personnel gap**: Developers write code, ops engineers deploy it. 8 | * **The tools gap**: Developers may be using a stack like Nginx, SQLite, and OS X, while the production deploy uses Apache, MySQL, and Linux. 9 | 10 | **The twelve-factor app is designed for [continuous deployment](http://avc.com/2011/02/continuous-deployment/) by keeping the gap between development and production small.** Looking at the three gaps described above: 11 | 12 | * Make the time gap small: a developer may write code and have it deployed hours or even just minutes later. 13 | * Make the personnel gap small: developers who wrote code are closely involved in deploying it and watching its behavior in production. 14 | * Make the tools gap small: keep development and production as similar as possible. 15 | 16 | Summarizing the above into a table: 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
Traditional appTwelve-factor app
Time between deploysWeeksHours
Code authors vs code deployersDifferent peopleSame people
Dev vs production environmentsDivergentAs similar as possible
40 | 41 | [Backing services](./backing-services), such as the app's database, queueing system, or cache, is one area where dev/prod parity is important. Many languages offer libraries which simplify access to the backing service, including *adapters* to different types of services. Some examples are in the table below. 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 |
TypeLanguageLibraryAdapters
DatabaseRuby/RailsActiveRecordMySQL, PostgreSQL, SQLite
QueuePython/DjangoCeleryRabbitMQ, Beanstalkd, Redis
CacheRuby/RailsActiveSupport::CacheMemory, filesystem, Memcached
69 | 70 | Developers sometimes find great appeal in using a lightweight backing service in their local environments, while a more serious and robust backing service will be used in production. For example, using SQLite locally and PostgreSQL in production; or local process memory for caching in development and Memcached in production. 71 | 72 | **The twelve-factor developer resists the urge to use different backing services between development and production**, even when adapters theoretically abstract away any differences in backing services. Differences between backing services mean that tiny incompatibilities crop up, causing code that worked and passed tests in development or staging to fail in production. These types of errors create friction that disincentivizes continuous deployment. The cost of this friction and the subsequent dampening of continuous deployment is extremely high when considered in aggregate over the lifetime of an application. 73 | 74 | Lightweight local services are less compelling than they once were. Modern backing services such as Memcached, PostgreSQL, and RabbitMQ are not difficult to install and run thanks to modern packaging systems, such as [Homebrew](http://mxcl.github.com/homebrew/) and [apt-get](https://help.ubuntu.com/community/AptGet/Howto). Alternatively, declarative provisioning tools such as [Chef](http://www.opscode.com/chef/) and [Puppet](http://docs.puppetlabs.com/) combined with light-weight virtual environments such as [Docker](https://www.docker.com/) and [Vagrant](http://vagrantup.com/) allow developers to run local environments which closely approximate production environments. The cost of installing and using these systems is low compared to the benefit of dev/prod parity and continuous deployment. 75 | 76 | Adapters to different backing services are still useful, because they make porting to new backing services relatively painless. But all deploys of the app (developer environments, staging, production) should be using the same type and version of each of the backing services. 77 | -------------------------------------------------------------------------------- /terraform/aws/ecs.tf: -------------------------------------------------------------------------------- 1 | resource "aws_ecs_cluster" "ai_assistant_cluster" { 2 | name = "ai_assistant" 3 | configuration { 4 | execute_command_configuration { 5 | kms_key_id = aws_kms_key.key.arn 6 | logging = "OVERRIDE" 7 | 8 | log_configuration { 9 | cloud_watch_encryption_enabled = true 10 | cloud_watch_log_group_name = aws_cloudwatch_log_group.ai_assistant-cloudwatch-log.name 11 | } 12 | } 13 | } 14 | } 15 | 16 | resource "aws_ecs_cluster_capacity_providers" "cluster" { 17 | cluster_name = aws_ecs_cluster.ai_assistant_cluster.name 18 | 19 | capacity_providers = ["FARGATE", "FARGATE_SPOT"] 20 | 21 | default_capacity_provider_strategy { 22 | capacity_provider = "FARGATE_SPOT" 23 | } 24 | } 25 | 26 | resource "aws_cloudwatch_log_group" "ai_assistant-cloudwatch-log" { 27 | name = "/ecs/ai_assistant-taskdef-iac-https" 28 | } 29 | 30 | resource "aws_ecs_service" "ai_assistant_service" { 31 | name = "ai_assistant-service-iac-https" 32 | cluster = aws_ecs_cluster.ai_assistant_cluster.id 33 | task_definition = aws_ecs_task_definition.ai_assistant_task_definition.arn 34 | force_new_deployment = true 35 | capacity_provider_strategy { 36 | capacity_provider = "FARGATE_SPOT" 37 | base = 1 38 | weight = 1 39 | } 40 | network_configuration { 41 | subnets = [data.aws_subnet.ai_assistant_subnet_1.id, data.aws_subnet.ai_assistant_subnet_2.id, data.aws_subnet.ai_assistant_subnet_3.id] 42 | security_groups = [aws_security_group.ai_assistant_security_group.id] 43 | assign_public_ip = true 44 | } 45 | deployment_circuit_breaker { 46 | enable = true 47 | rollback = true 48 | } 49 | desired_count = 1 50 | 51 | load_balancer { 52 | target_group_arn = aws_lb_target_group.ai_assistant_target_group_https.arn 53 | container_name = "ai_assistant_https" 54 | container_port = 80 55 | } 56 | 57 | depends_on = [aws_lb_listener.application_lb_listener] 58 | } 59 | 60 | resource "aws_ecs_task_definition" "ai_assistant_task_definition" { 61 | family = "ai_assistant-taskdef-iac-https" 62 | network_mode = "awsvpc" 63 | requires_compatibilities = ["FARGATE"] 64 | 65 | cpu = "512" 66 | memory = "1024" 67 | volume { 68 | name = "efs-volume" 69 | efs_volume_configuration { 70 | file_system_id = aws_efs_file_system.ai_assistant_efs_file_system.id 71 | root_directory = "/" 72 | transit_encryption = "ENABLED" 73 | } 74 | } 75 | volume { 76 | name = "configuration-secret" 77 | efs_volume_configuration { 78 | file_system_id = aws_efs_file_system.ai_assistant_efs_file_system.id 79 | root_directory = "/" 80 | transit_encryption = "ENABLED" 81 | } 82 | } 83 | execution_role_arn = aws_iam_role.ai_assistant_ecs_execution_role.arn 84 | task_role_arn = aws_iam_role.ai_assistant_ecs_execution_role.arn 85 | 86 | container_definitions = jsonencode([{ 87 | name = "ai_assistant_https" 88 | image = var.ecr_image_url 89 | cpu = 512 90 | memory = 1024 91 | runtime_platform = { 92 | "cpuArchitecture" : "X86_64", 93 | "operatingSystemFamily" : "LINUX" 94 | } 95 | mountPoints = [{ 96 | sourceVolume = "efs-volume" 97 | containerPath = "/app/data/chroma" 98 | readOnly = false 99 | }, 100 | { 101 | sourceVolume = "configuration-secret" 102 | containerPath = "/app/.streamlit" 103 | readOnly = false 104 | }] 105 | memoryReservation = 1024 106 | portMappings = [{ 107 | name = "ai_assistant-80-tcp" 108 | containerPort = 80 109 | hostPort = 80 110 | appProtocol = "http" 111 | }] 112 | logConfiguration = { 113 | logDriver = "awslogs" 114 | options = { 115 | awslogs-create-group = "true" 116 | awslogs-group = "/ecs/ai_assistant-taskdef-iac" 117 | awslogs-region = "eu-west-1" 118 | awslogs-stream-prefix = "ecs" 119 | }, 120 | } 121 | essential = true 122 | environment = [ 123 | { 124 | "name" : "LANGCHAIN_TRACING_V2", 125 | "value": "${var.langchain_tracing_v2_bool}" 126 | } 127 | ] 128 | secrets = [ 129 | { 130 | "name" : "OPENAI_API_KEY", 131 | "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.openai_key_name}::" 132 | }, 133 | { 134 | "name" : "MISTRAL_API_KEY", 135 | 136 | "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.mistral_key_name}::" 137 | }, 138 | { 139 | "name" : "HF_TOKEN", 140 | 141 | "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.hf_token_name}::" 142 | }, 143 | { 144 | "name" : "LANGCHAIN_API_KEY", 145 | 146 | "valueFrom" : "${data.aws_secretsmanager_secret.secret.arn}:${var.langchain_key_name}::" 147 | } 148 | ] 149 | }]) 150 | } 151 | -------------------------------------------------------------------------------- /data/sources/md/12factor/fr/dev-prod-parity.md: -------------------------------------------------------------------------------- 1 | ## X. Parité dev/prod 2 | ### Gardez le développement, la validation et la production aussi proches que possible 3 | 4 | Historiquement, il y a eu un fossé conséquent entre le développement (un développeur qui fait des modifications sur un [déploiement](./codebase) local de l'application) et la production (un déploiement de l'application accessible aux utilisateurs finaux). Ce fossé se manifeste de trois manières : 5 | 6 | * **Le fossé temporel** : un développeur peut travailler sur du code qui peut prendre des jours, des semaines ou des mois avant d'aller en production 7 | * **Le fossé des personnes** : les développeurs écrivent le code, et d'autres personnes le déploient. 8 | * **Le fossé des outils** : les développeurs peuvent utiliser une pile comme Nginx, SQLite, et OS X, alors que le déploiement de production utilise Apache, MySQL, et Linux. 9 | 10 | **Les applications 12 facteurs sont conçues pour le [déploiement continu (en)](http://avc.com/2011/02/continuous-deployment/) en gardant un fossé étroit entre le développement et la production.** Si l'on regarde les trois fossés décrits plus haut : 11 | 12 | * Réduire le fossé temporel : un développeur peut écrire du code et le déployer quelques heures ou même juste quelques minutes plus tard. 13 | * Réduire le fossé des personnes : les personnes qui écrivent le code sont impliquées dans son déploiement et pour surveiller son comportement en production. 14 | * Réduire le fossé des outils : réduire, autant que possible, les différences entre le développement et la production. 15 | 16 | Si l'on résume cela en un tableau : 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |
Application traditionnelleApplication 12 facteurs
Temps entre les déploiementsSemainesHeures
Auteurs du code et ceux qui le déploientDes personnes différentesLes mêmes personnes
L'environnement de développement et celui de productionDivergentsAussi similaires que possible
40 | 41 | [Les services externes](./backing-services), tels que la base de données, la file de messages, ou le cache sont des éléments importants de la parité développement/production. La plupart des langages fournissent des bibliothèques qui simplifient l'accès à ces services externes, en fournissant des adaptateurs pour différents types de services. Voici quelques exemples dans le tableau ci-dessous. 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 |
TypeLangageLibrairieAdaptateurs
Base de donnéesRuby/RailsActiveRecordMySQL, PostgreSQL, SQLite
File de messagesPython/DjangoCeleryRabbitMQ, Beanstalkd, Redis
CacheRuby/RailsActiveSupport::CacheMémoire, système de fichiers, Memcached
69 | 70 | Les développeurs trouvent parfois agréable d'utiliser des services externes légers dans leur environnement local, alors qu'un service externe plus sérieux et robuste est utilisé en production. Par exemple, utiliser SQLite en local, et PostgreSQL en production; ou bien, durant le développement, mettre les données en cache dans la mémoire des processus locaux, et utiliser Memcached en production. 71 | 72 | **Les développeurs des applications 12 facteurs résistent au besoin d'utiliser des services externes différents entre le développement local et la production**, même lorsque les adaptateurs permettent d'abstraire en théorie beaucoup de différences entre les services externes. Les différences entre les services externes signifient que de petites incompatibilités surviennent, ce qui va faire que du code qui fonctionnait et qui passait les tests durant le développement ou la validation ne fonctionnera pas en production. Ce type d'erreurs crée de la friction en défaveur du déploiement continu. Le coût de cette friction et son impact négatif sur le déploiement continu est extrêmement élevé lorsqu'il est cumulé sur toute la vie de l'application. 73 | 74 | Les services locaux légers sont moins attirants aujourd'hui qu'ils ne l'étaient autrefois. Les services externes modernes tels que Memcached, PostgreSQL, et RabbitMQ ne sont pas difficiles à installer et à faire fonctionner grâce aux systèmes de paquets modernes comme [Homebrew](http://mxcl.github.com/homebrew/) et [apt-get](https://help.ubuntu.com/community/AptGet/Howto). Autre possibilité, des outils de provisionnement comme [Chef](http://www.opscode.com/chef/) et [Puppet](http://docs.puppetlabs.com/), combinés à des environnements virtuels légers comme [Docker](https://www.docker.com/) et [Vagrant](http://vagrantup.com/) permettent aux développeurs de faire fonctionner des environnements locaux qui reproduisent de très près les environnements de production. Le coût d'installation et d'utilisation de ces systèmes est faible comparé aux bénéfices d'une bonne parité développement/production et du déploiement continu. 75 | 76 | Les adaptateurs à ces différents systèmes externes sont malgré tout utiles, car ils rendent le portage vers de nouveaux services externes relativement indolores. Mais tous les déploiements de l'application (environnement de développement, validation, production) devraient utiliser le même type et la même version de chacun de ces services externes. 77 | -------------------------------------------------------------------------------- /tests/rag_bedrock/test_rag_langchain.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pytest 4 | 5 | 6 | from rag_bedrock.base import LangchainTestRAGHelper 7 | 8 | 9 | @pytest.mark.usefixtures("trulens_prepare", 10 | "bedrock_prepare", 11 | "documents_prepare", 12 | "llm_prepare", 13 | "embeddings_prepare", 14 | "eval_questions_prepare", 15 | "trulens_context_prepare", 16 | "provider_prepare", 17 | "rag_prepare", 18 | "feedbacks_prepare") 19 | class TestRAGLangChainClaude3SonnetTitanEmbedV1(LangchainTestRAGHelper): 20 | 21 | @property 22 | def test_name(self): 23 | return "Langchain_Claude_3_Sonnet_Titan_Embed_V1" 24 | 25 | @property 26 | def model_id(self): 27 | return "anthropic.claude-3-sonnet-20240229-v1:0" 28 | 29 | @property 30 | def embedding_model_id(self): 31 | return "amazon.titan-embed-text-v1" 32 | 33 | 34 | @pytest.mark.usefixtures("trulens_prepare", 35 | "bedrock_prepare", 36 | "documents_prepare", 37 | "llm_prepare", 38 | "embeddings_prepare", 39 | "eval_questions_prepare", 40 | "trulens_context_prepare", 41 | "provider_prepare", 42 | "rag_prepare", 43 | "feedbacks_prepare") 44 | class TestRAGLangChainClaude3SonnetTitanEmbedV2(LangchainTestRAGHelper): 45 | 46 | @property 47 | def test_name(self): 48 | return "Langchain_Claude_3_Sonnet_Titan_Embed_V2" 49 | 50 | @property 51 | def model_id(self): 52 | return "anthropic.claude-3-sonnet-20240229-v1:0" 53 | 54 | @property 55 | def embedding_model_id(self): 56 | return "amazon.titan-embed-text-v2:0" 57 | 58 | 59 | @pytest.mark.usefixtures("trulens_prepare", 60 | "bedrock_prepare", 61 | "documents_prepare", 62 | "llm_prepare", 63 | "embeddings_prepare", 64 | "trulens_context_prepare", 65 | "provider_prepare", 66 | "eval_questions_prepare", 67 | "rag_prepare", 68 | "feedbacks_prepare") 69 | class TestRAGLangChainMistralLargeTitanEmbedV1(LangchainTestRAGHelper): 70 | 71 | @property 72 | def test_name(self): 73 | return "Langchain_Mistral_Large_Titan_Embed_V1" 74 | 75 | @property 76 | def model_id(self): 77 | return "mistral.mistral-large-2402-v1:0" 78 | 79 | @property 80 | def embedding_model_id(self): 81 | return "amazon.titan-embed-text-v1" 82 | 83 | 84 | @pytest.mark.usefixtures("trulens_prepare", 85 | "bedrock_prepare", 86 | "documents_prepare", 87 | "llm_prepare", 88 | "embeddings_prepare", 89 | "trulens_context_prepare", 90 | "provider_prepare", 91 | "eval_questions_prepare", 92 | "rag_prepare", 93 | "feedbacks_prepare") 94 | class TestRAGLangChainMistralLargeTitanEmbedV2(LangchainTestRAGHelper): 95 | 96 | @property 97 | def test_name(self): 98 | return "Langchain_Mistral_Large_Titan_Embed_V2" 99 | 100 | @property 101 | def model_id(self): 102 | return "mistral.mistral-large-2402-v1:0" 103 | 104 | @property 105 | def embedding_model_id(self): 106 | return "amazon.titan-embed-text-v2:0" 107 | 108 | 109 | @pytest.mark.usefixtures("trulens_prepare", 110 | "bedrock_prepare", 111 | "documents_prepare", 112 | "llm_prepare", 113 | "embeddings_prepare", 114 | "trulens_context_prepare", 115 | "provider_prepare", 116 | "eval_questions_prepare", 117 | "rag_prepare", 118 | "feedbacks_prepare") 119 | class TestRAGLangChainMistralLargeTitanEmbedMultiModal(LangchainTestRAGHelper): 120 | 121 | @property 122 | def test_name(self): 123 | return "Langchain_Mistral_Large_Titan_Multimodal" 124 | 125 | @property 126 | def model_id(self): 127 | return "mistral.mistral-large-2402-v1:0" 128 | 129 | @property 130 | def embedding_model_id(self): 131 | return "amazon.titan-embed-image-v1" 132 | 133 | 134 | @pytest.mark.usefixtures("trulens_prepare", 135 | "bedrock_prepare", 136 | "documents_prepare", 137 | "llm_prepare", 138 | "embeddings_prepare", 139 | "trulens_context_prepare", 140 | "provider_prepare", 141 | "eval_questions_prepare", 142 | "rag_prepare", 143 | "feedbacks_prepare") 144 | class TestRAGLangChainMistralLargeCohereEmbedMultiLingual(LangchainTestRAGHelper): 145 | 146 | @property 147 | def test_name(self): 148 | return "Langchain_Mistral_Large_Cohere_Embed" 149 | 150 | @property 151 | def model_id(self): 152 | return "mistral.mistral-large-2402-v1:0" 153 | 154 | @property 155 | def embedding_model_id(self): 156 | return "cohere.embed-multilingual-v3" 157 | -------------------------------------------------------------------------------- /rag_assistant/shared/rag_prompts.py: -------------------------------------------------------------------------------- 1 | __template__ = """Answer the following questions as best you can. You have access to the following tools: 2 | 3 | {tools} 4 | 5 | Use the following format: 6 | 7 | Question: the input question you must answer 8 | Thought: you should always think about what to do 9 | Action: the action to take, should be one of [{tool_names}] 10 | Action Input: the input to the action 11 | Observation: the result of the action 12 | ... (this Thought/Action/Action Input/Observation can repeat N times) 13 | Thought: I now know the final answer 14 | Final Answer: the final answer to the original input question 15 | 16 | Only use information provided in the context. 17 | Check your output and make sure it conforms! 18 | DO NOT output an action and a final answer at the same time. 19 | NEVER output a final answer if you are still expecting to receive the response of a tool. 20 | 21 | Begin!""" 22 | 23 | __structured_chat_agent__ = '''Respond to the human as helpfully and accurately as possible. 24 | You have access to the following tools: 25 | 26 | {tools} 27 | 28 | Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). 29 | 30 | Valid "action" values: "Final Answer" or {tool_names} 31 | 32 | Provide only ONE action per $JSON_BLOB, as shown: 33 | 34 | ``` 35 | {{ 36 | "action": $TOOL_NAME, 37 | "action_input": $INPUT 38 | }} 39 | ``` 40 | 41 | Follow this format: 42 | 43 | Question: input question to answer 44 | Thought: consider previous and subsequent steps 45 | Action: 46 | ``` 47 | $JSON_BLOB 48 | ``` 49 | Observation: action result 50 | ... (repeat Thought/Action/Observation N times) 51 | Thought: I know what to respond 52 | Action: 53 | ``` 54 | {{ 55 | "action": "Final Answer", 56 | "action_input": "Final response to human" 57 | }} 58 | 59 | Reminder to ALWAYS respond with a valid json blob of a single action. 60 | Do not respond directly to question. Only use information provided in the context. 61 | Use tools to retrieve relevant information. 62 | DO NOT output an action and a final answer at the same time. 63 | Format is Action:```$JSON_BLOB``` then Observation 64 | 65 | Begin! ''' 66 | 67 | 68 | __template2__ = """You are an assistant designed to guide users through a structured risk assessment questionnaire for cloud deployment. 69 | The questionnaire is designed to cover various pillars essential for cloud architecture, 70 | including security, compliance, availability, access methods, data storage, processing, performance efficiency, 71 | cost optimization, and operational excellence. 72 | 73 | For each question, you are to follow the "Chain of Thought" process. This means that for each user's response, you will: 74 | 75 | - Acknowledge the response, 76 | - Reflect on the implications of the choice, 77 | - Identify any risks associated with the selected option, 78 | - Suggest best practices and architecture patterns that align with the user’s selection, 79 | - Guide them to the next relevant question based on their previous answers. 80 | 81 | Your objective is to ensure that by the end of the questionnaire, the user has a clear understanding of the appropriate architecture and services needed for a secure, efficient, and compliant cloud deployment. Remember to provide answers in a simple, interactive, and concise manner. 82 | 83 | Process: 84 | 85 | 1. Begin by introducing the purpose of the assessment and ask the first question regarding data security and compliance. 86 | 2. Based on the response, discuss the chosen level of data security, note any specific risks or requirements, 87 | and recommend corresponding cloud services or architectural patterns. 88 | 3. Proceed to the next question on application availability. Once the user responds, 89 | reflect on the suitability of their choice for their application's criticality and suggest availability configurations. 90 | 4. For questions on access methods and data storage, 91 | provide insights on securing application access points or optimizing data storage solutions. 92 | 5. When discussing performance efficiency, 93 | highlight the trade-offs between performance and cost, and advise on scaling strategies. 94 | 6. In the cost optimization section, 95 | engage in a brief discussion on budgetary constraints and recommend cost-effective cloud resource management. 96 | 7. Conclude with operational excellence, 97 | focusing on automation and monitoring, 98 | and propose solutions for continuous integration and deployment. 99 | 8. After the final question, 100 | summarize the user's choices and their implications for cloud architecture. 101 | 9. Offer a brief closing statement that reassures the user of the assistance provided 102 | and the readiness of their cloud deployment strategy. 103 | 104 | Keep the interactions focused on architectural decisions without diverting to other unrelated topics. 105 | You are not to perform tasks outside the scope of the questionnaire, 106 | such as executing code or accessing external databases. 107 | Your guidance should be solely based on the information provided by the user in the context of the questionnaire. 108 | Always answer in French. 109 | {context} 110 | Question: {question} 111 | Helpful Answer:""" 112 | 113 | 114 | human = '''{input} 115 | 116 | {agent_scratchpad}''' 117 | -------------------------------------------------------------------------------- /rag_assistant/pages/3_RAG_Admin.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | import json 4 | 5 | from utils.auth import check_password 6 | from langchain_community.vectorstores import OpenSearchVectorSearch 7 | 8 | from utils.constants import DocumentType, ChunkType, Metadata, CollectionType 9 | from utils.utilsdoc import get_store, empty_store, extract_unique_name, get_collection_count, get_metadatas, delete_documents_by_type_and_name 10 | from utils.utilsfile import list_files, delete_file 11 | from utils.config_loader import load_config 12 | 13 | 14 | config = load_config() 15 | app_name = config['DEFAULT']['APP_NAME'] 16 | collection_name = config['VECTORDB']['collection_name'] 17 | 18 | st.set_page_config(page_title=f"""📄 {app_name} 🤗""", page_icon="📄") 19 | 20 | 21 | def main(): 22 | st.title(f"""Gestion des connaissances 📄""") 23 | 24 | # collection_name = st.selectbox("Collection", ["Default", "RAG"]) 25 | 26 | count = get_collection_count(collection_name) 27 | if count > 0: 28 | st.write(f"Il y a **{count}** morceaux (chunks) dans la collection '**{collection_name}**'.") 29 | else: 30 | st.write("La collection est vide.") 31 | st.page_link("pages/2_Load_Document.py", label="Charger les connaissances") 32 | 33 | st.subheader("Fichier(s) chargé(s)") 34 | 35 | unique_filenames = extract_unique_name(collection_name, Metadata.FILENAME.value) 36 | 37 | for name in unique_filenames: 38 | st.markdown(f"""- {name}""") 39 | 40 | st.subheader("Sujet(s) disponible(s):") 41 | unique_topic_names = extract_unique_name(collection_name, Metadata.TOPIC.value) 42 | for name in unique_topic_names: 43 | st.markdown(f"""- {name}""") 44 | 45 | # st.subheader("Document Type") 46 | # unique_document_types = extract_unique_name(collection_name, 'document_type') 47 | # for name in unique_document_types: 48 | # st.markdown(f"""- {name}""") 49 | 50 | with st.form("search"): 51 | st.subheader("Chercher dans la Base de Connaissance:") 52 | search = st.text_input("Texte (*)") 53 | 54 | topic_name = st.selectbox("Sujet", unique_topic_names, index=None) 55 | filename = st.selectbox("Nom du Fichier", unique_filenames, index=None) 56 | document_type = st.selectbox("Type de Document", [e.value for e in DocumentType], index=None) 57 | chunk_type = st.selectbox("Type de Morceau", [e.value for e in ChunkType], index=None) 58 | #document_type = st.selectbox("Document Type", unique_document_types, index=None) 59 | 60 | filters = [] 61 | if filename: 62 | filters.append({Metadata.FILENAME.value: filename}) 63 | if document_type: 64 | filters.append({Metadata.DOCUMENT_TYPE.value: document_type}) 65 | if topic_name: 66 | filters.append({Metadata.TOPIC.value: topic_name}) 67 | if document_type: 68 | filters.append({Metadata.DOCUMENT_TYPE.value: document_type}) 69 | if chunk_type: 70 | filters.append({Metadata.CHUNK_TYPE.value: chunk_type}) 71 | if st.form_submit_button("Recherche"): 72 | # add check for empty string as it is not supported by bedrock (or anthropic?) 73 | if search != "": 74 | if len(filters) > 1: 75 | where = {"$and": filters} 76 | elif len(filters) == 1: 77 | where = filters[0] 78 | else: 79 | where = {} 80 | store = get_store() 81 | if isinstance(store, OpenSearchVectorSearch): 82 | result_filters = [] 83 | for os_filter in filters: 84 | for key in os_filter.keys(): 85 | result_filters.append({"match": {f"metadata.{key}": os_filter[key]}}) 86 | result = store.similarity_search(search, k=5, boolean_filter=result_filters) 87 | else: 88 | result = store.similarity_search(search, k=5, filter=where) 89 | st.write(result) 90 | else: 91 | st.write("Veuillez entrer un texte.") 92 | 93 | st.subheader("Administration des Données") 94 | 95 | col1, col2 = st.columns(2) 96 | with col1: 97 | file_name_to_delete = st.selectbox("Choisir un fichier", unique_filenames, index=None) 98 | if st.button("Supprimer les données du fichier"): 99 | delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.FILENAME.value, name=file_name_to_delete) 100 | delete_file(file_name_to_delete, CollectionType.DOCUMENTS.value) 101 | 102 | chunk_type_to_delete = st.selectbox("Choisir un type de morceau (chunk)", [e.value for e in ChunkType], index=None) 103 | if st.button("Supprimer les données de ce type"): 104 | delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.CHUNK_TYPE.value, 105 | name=chunk_type_to_delete) 106 | 107 | with col2: 108 | topic_name_to_delete = st.selectbox("Choisir un sujet", unique_topic_names, index=None) 109 | if st.button("Supprimer les données de ce sujet"): 110 | delete_documents_by_type_and_name(collection_name=collection_name, type=Metadata.TOPIC.value, name=topic_name_to_delete) 111 | 112 | if st.button("Supprimer la collection"): 113 | empty_store(collection_name=collection_name) 114 | 115 | with st.expander("Voir toutes les meta-données", expanded=False): 116 | st.subheader("Méta-données") 117 | metadatas = get_metadatas(collection_name=collection_name) 118 | st.code(json.dumps(metadatas, indent=4, sort_keys=True), language="json") 119 | 120 | 121 | if __name__ == "__main__": 122 | if not check_password(): 123 | # Do not continue if check_password is not True. 124 | st.stop() 125 | main() 126 | -------------------------------------------------------------------------------- /tests/utils/test_utilsfile.py: -------------------------------------------------------------------------------- 1 | """Test the utilsfile file.""" 2 | 3 | import unittest 4 | import os 5 | #from pytest import fixture 6 | from unittest.mock import patch 7 | from rag_assistant.utils.utilsfile import list_files, _list_files_locally, _list_files_from_s3 8 | 9 | 10 | class TestListFiles(unittest.TestCase): 11 | """Test the list_files function.""" 12 | 13 | @patch('rag_assistant.utils.utilsfile.config.get') 14 | @patch('rag_assistant.utils.utilsfile._list_files_locally') 15 | def test_list_files_locally(self, mock_list_files_locally, mock_config_get): 16 | """Test list_files with LOCAL configuration.""" 17 | mock_config_get.return_value = 'LOCAL' 18 | mock_list_files_locally.return_value = ['file1.txt', 'file2.txt'] 19 | result = list_files('my_collection') 20 | self.assertEqual(result, ['file1.txt', 'file2.txt']) 21 | mock_list_files_locally.assert_called_once_with(file_collection='my_collection') 22 | 23 | @patch('rag_assistant.utils.utilsfile.config.get') 24 | @patch('rag_assistant.utils.utilsfile._list_files_from_s3') 25 | def test_list_files_s3(self, mock_list_files_from_s3, mock_config_get): 26 | """Test list_files with S3 configuration.""" 27 | mock_config_get.return_value = 'S3' 28 | mock_list_files_from_s3.return_value = ['file1.txt', 'file2.txt'] 29 | result = list_files('my_collection') 30 | self.assertEqual(result, ['file1.txt', 'file2.txt']) 31 | mock_list_files_from_s3.assert_called_once_with(file_collection='my_collection') 32 | 33 | @patch('rag_assistant.utils.utilsfile.config.get') 34 | def test_list_files_none(self, mock_config_get): 35 | """Test list_files with NONE configuration.""" 36 | mock_config_get.return_value = 'NONE' 37 | result = list_files('my_collection') 38 | self.assertIsNone(result) 39 | 40 | @patch('rag_assistant.utils.utilsfile.config.get') 41 | def test_list_files_not_implemented(self, mock_config_get): 42 | """Test list_files with an unknown configuration.""" 43 | mock_config_get.return_value = 'UNKNOWN' 44 | with self.assertRaises(NotImplementedError): 45 | list_files('my_collection') 46 | 47 | @patch('rag_assistant.utils.utilsfile.config.get') 48 | @patch('os.path.exists') 49 | @patch('os.listdir') 50 | def test__list_files_locally(self, mock_listdir, mock_path_exists, mock_config_get): 51 | """Test _list_files_locally function.""" 52 | mock_listdir.return_value = ['file1.txt', 'file2.txt', 'file3.jpg'] 53 | mock_path_exists.return_value = True 54 | mock_config_get.return_value = 'data' 55 | result = _list_files_locally('my_local_collection') 56 | self.assertEqual(result, ['file1.txt', 'file2.txt', 'file3.jpg']) 57 | mock_listdir.assert_called_once_with(os.path.join('data', 'my_local_collection')) 58 | mock_path_exists.assert_called_once_with(os.path.join('data', 'my_local_collection')) 59 | 60 | @patch('rag_assistant.utils.utilsfile.config.get') 61 | @patch('os.path.exists') 62 | @patch('os.listdir') 63 | def test__list_files_locally_empty(self, mock_listdir, mock_path_exists, mock_config_get): 64 | """Test _list_files_locally function with an empty directory.""" 65 | mock_listdir.return_value = [] 66 | mock_path_exists.return_value = True 67 | mock_config_get.return_value = 'data' 68 | result = _list_files_locally('empty_collection') 69 | self.assertEqual(result, []) 70 | mock_listdir.assert_called_once_with(os.path.join('data', 'empty_collection')) 71 | 72 | @patch('rag_assistant.utils.utilsfile.boto3.client') 73 | @patch('rag_assistant.utils.utilsfile.config.get') 74 | def test__list_files_from_s3(self, mock_config_get, mock_boto3_client): 75 | """Test _list_files_from_s3 function.""" 76 | mock_config_get.return_value = 'my_test_bucket' 77 | mock_s3_client = mock_boto3_client.return_value 78 | mock_s3_client.list_objects_v2.return_value = { 79 | 'Contents': [ 80 | {'Key': 'file_collection/file1.txt'}, 81 | {'Key': 'file_collection/file2.txt'} 82 | ] 83 | } 84 | result = _list_files_from_s3('file_collection') 85 | self.assertEqual(result, ['file1.txt', 'file2.txt']) 86 | mock_boto3_client.assert_called_once_with('s3') 87 | mock_s3_client.list_objects_v2.assert_called_once_with(Bucket='my_test_bucket', 88 | Prefix='file_collection') 89 | 90 | @patch('rag_assistant.utils.utilsfile.boto3.client') 91 | @patch('rag_assistant.utils.utilsfile.config.get') 92 | def test__list_files_from_s3_empty(self, mock_config_get, mock_boto3_client): 93 | """Test _list_files_from_s3 function with an empty collection.""" 94 | mock_config_get.return_value = 'my_test_bucket' 95 | mock_s3_client = mock_boto3_client.return_value 96 | mock_s3_client.list_objects_v2.return_value = {} 97 | result = _list_files_from_s3('empty_collection') 98 | self.assertEqual(result, []) 99 | mock_boto3_client.assert_called_once_with('s3') 100 | mock_s3_client.list_objects_v2.assert_called_once_with(Bucket='my_test_bucket', 101 | Prefix='empty_collection') 102 | 103 | @patch('rag_assistant.utils.utilsfile.boto3.client') 104 | @patch('rag_assistant.utils.utilsfile.config.get') 105 | def test__list_files_from_s3_exception(self, mock_config_get, mock_boto3_client): 106 | """Test _list_files_from_s3 function with an exception.""" 107 | mock_config_get.return_value = 'my_test_bucket' 108 | mock_s3_client = mock_boto3_client.return_value 109 | mock_s3_client.list_objects_v2.side_effect = Exception('S3 access error') 110 | with self.assertRaises(Exception) as context: 111 | _list_files_from_s3('invalid_collection') 112 | self.assertTrue('S3 access error' in str(context.exception)) 113 | -------------------------------------------------------------------------------- /rag_assistant/utils/utilsfile.py: -------------------------------------------------------------------------------- 1 | """Utils for storing files""" 2 | import io 3 | import os 4 | import logging 5 | import boto3 6 | 7 | from streamlit.runtime.uploaded_file_manager import UploadedFile 8 | 9 | from .config_loader import load_config 10 | from .constants import StorageType 11 | 12 | config = load_config() 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | def put_file(file: io.BytesIO, filename:str, file_collection: str = '') -> None: 17 | """Persist file to selected storage interface""" 18 | storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE') 19 | 20 | if storage_interface == StorageType.LOCAL.value: 21 | _persist_file_locally(file, filename=filename, file_collection=file_collection) 22 | elif storage_interface == StorageType.S3.value: 23 | _persist_file_to_s3(file, filename=filename, file_collection=file_collection) 24 | elif storage_interface == StorageType.NONE.value: 25 | pass 26 | else: 27 | raise NotImplementedError(f"{storage_interface} not implemented yet for storage.") 28 | 29 | 30 | def get_file(filename: str, file_collection: str=''): 31 | """Get file from selected storage interface""" 32 | storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE') 33 | 34 | if storage_interface == StorageType.LOCAL.value: 35 | return _get_file_locally(filename=filename, file_collection=file_collection) 36 | 37 | if storage_interface == StorageType.S3.value: 38 | return _get_file_from_s3(filename=filename, file_collection=file_collection) 39 | 40 | if storage_interface == StorageType.NONE.value: 41 | return None 42 | 43 | raise NotImplementedError(f"{storage_interface} not implemented yet for storage.") 44 | 45 | 46 | def delete_file(filename: str, file_collection: str=''): 47 | """Delete file from selected storage interface""" 48 | storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE') 49 | 50 | if storage_interface == StorageType.LOCAL.value: 51 | return _delete_file_locally(filename=filename, file_collection=file_collection) 52 | 53 | if storage_interface == StorageType.S3.value: 54 | return _delete_file_from_s3(filename=filename, file_collection=file_collection) 55 | 56 | if storage_interface == StorageType.NONE.value: 57 | return None 58 | 59 | raise NotImplementedError(f"{storage_interface} not implemented yet for storage.") 60 | 61 | 62 | def list_files(file_collection: str=''): 63 | """List files from selected storage interface""" 64 | storage_interface = config.get('DOCUMENTS_STORAGE', 'INTERFACE') 65 | 66 | if storage_interface == StorageType.LOCAL.value: 67 | return _list_files_locally(file_collection=file_collection) 68 | 69 | if storage_interface == StorageType.S3.value: 70 | return _list_files_from_s3(file_collection=file_collection) 71 | 72 | if storage_interface == StorageType.NONE.value: 73 | return None 74 | 75 | raise NotImplementedError(f"{storage_interface} not implemented yet for storage.") 76 | 77 | 78 | def _persist_file_to_s3(file: io.BytesIO, filename: str, file_collection: str) -> None: 79 | """Persist file to S3 storage""" 80 | logger.info("On persiste un document : %s sur S3", filename) 81 | s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME') 82 | 83 | file_key = f"{file_collection}/{filename}" 84 | 85 | s3_client = boto3.client('s3') 86 | 87 | s3_client.upload_fileobj(file, s3_bucket, file_key) 88 | 89 | 90 | def _persist_file_locally(file: io.BytesIO, filename:str, file_collection: str) -> None: 91 | """Persist file to local storage""" 92 | logger.info("On persiste un document : %s localement", filename) 93 | documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH') 94 | 95 | file_path = os.path.join(documents_path, file_collection) 96 | 97 | if not os.path.exists(file_path): 98 | os.makedirs(file_path) 99 | 100 | file_path = os.path.join(file_path, filename) 101 | 102 | with open(file_path, 'wb') as f: 103 | f.write(file.getbuffer()) 104 | 105 | 106 | def _get_file_locally(filename: str, file_collection: str): 107 | """Get file from local storage""" 108 | documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH') 109 | 110 | file_path = os.path.join(documents_path, file_collection, filename) 111 | 112 | if os.path.exists(file_path): 113 | return open(file_path, 'rb').read() 114 | 115 | return None 116 | 117 | 118 | def _get_file_from_s3(filename: str, file_collection: str): 119 | """Get file from S3 storage""" 120 | s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME') 121 | 122 | file_key = f"{file_collection}/{filename}" 123 | 124 | s3_client = boto3.client('s3') 125 | 126 | response = s3_client.get_object(Bucket=s3_bucket, Key=file_key) 127 | return response['Body'].read() 128 | 129 | 130 | def _delete_file_locally(filename: str, file_collection: str): 131 | """Delete file from local storage""" 132 | documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH') 133 | 134 | file_path = os.path.join(documents_path, file_collection, filename) 135 | 136 | if os.path.exists(file_path): 137 | os.remove(file_path) 138 | 139 | 140 | def _delete_file_from_s3(filename: str, file_collection: str): 141 | """Delete file from S3 storage""" 142 | s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME') 143 | 144 | file_key = f"{file_collection}/{filename}" 145 | 146 | s3_client = boto3.client('s3') 147 | 148 | s3_client.delete_object(Bucket=s3_bucket, Key=file_key) 149 | 150 | 151 | def _list_files_locally(file_collection: str): 152 | """List files from local storage""" 153 | documents_path = config.get('DOCUMENTS_STORAGE', 'DOCUMENTS_PATH') 154 | 155 | file_path = os.path.join(documents_path, file_collection) 156 | 157 | if os.path.exists(file_path): 158 | return os.listdir(file_path) 159 | 160 | return [] 161 | 162 | 163 | def _list_files_from_s3(file_collection: str): 164 | """List files from S3 storage""" 165 | s3_bucket = config.get('DOCUMENTS_STORAGE', 'S3_BUCKET_NAME') 166 | 167 | s3_client = boto3.client('s3') 168 | 169 | response = s3_client.list_objects_v2(Bucket=s3_bucket, Prefix=file_collection) 170 | 171 | if 'Contents' in response: 172 | return [obj['Key'].split('/')[-1] for obj in response['Contents']] 173 | 174 | return [] 175 | -------------------------------------------------------------------------------- /rag_assistant/utils/utilsvision.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import hashlib 3 | import imghdr 4 | import json 5 | import os 6 | import io 7 | import shutil 8 | from typing import Optional, Union 9 | 10 | import boto3 11 | from langchain_core.documents import Document 12 | from pypdf import PdfReader 13 | from streamlit.runtime.uploaded_file_manager import UploadedFile 14 | 15 | from utils.constants import ChunkType, Metadata, CollectionType 16 | from utils.config_loader import load_config 17 | from utils.utilsdoc import clean_text 18 | from utils.utilsfile import put_file 19 | 20 | config = load_config() 21 | 22 | aws_profile_name = os.getenv("profile_name") 23 | bedrock_region_name = config["BEDROCK"]["AWS_REGION_NAME"] 24 | #bedrock_embeddings_model = config["BEDROCK"]["EMBEDDINGS_MODEL"] 25 | bedrock_endpoint_url = config["BEDROCK"]["BEDROCK_ENDPOINT_URL"] 26 | vision_model = config["VISION"]["VISION_MODEL"] 27 | 28 | boto3.setup_default_session(profile_name=os.getenv("profile_name")) 29 | bedrock = boto3.client("bedrock-runtime", bedrock_region_name, endpoint_url=bedrock_endpoint_url) 30 | 31 | 32 | 33 | extract_image_output_dir = config['VISION']['IMAGE_OUTPUT_DIR'] 34 | 35 | def image_to_text(encoded_image, media_type) -> Optional[str]: 36 | system_prompt = """Describe every detail you can about this image, 37 | be extremely thorough and detail even the most minute aspects of the image. 38 | Start your description by providing an image title followed by a short overall summary. 39 | If the image is a table, output the content of the table in a structured format. 40 | """ 41 | 42 | prompt = { 43 | "anthropic_version": "bedrock-2023-05-31", 44 | "max_tokens": 1000, 45 | "temperature": 0, 46 | "system": system_prompt, 47 | "messages": [ 48 | { 49 | "role": "user", 50 | "content": [ 51 | { 52 | "type": "image", 53 | "source": { 54 | "type": "base64", 55 | "data": encoded_image, 56 | "media_type": media_type 57 | } 58 | }, 59 | { 60 | "type": "text", 61 | "text": system_prompt 62 | } 63 | ] 64 | } 65 | ] 66 | } 67 | 68 | json_prompt = json.dumps(prompt) 69 | try: 70 | response = bedrock.invoke_model(body=json_prompt, modelId=vision_model, 71 | accept="application/json", contentType="application/json") 72 | response_body = json.loads(response.get('body').read()) 73 | output = response_body['content'][0]['text'] 74 | return output 75 | 76 | # Catch all other (unexpected) exceptions 77 | except Exception as e: 78 | print(f"An unexpected error occurred: {e}") 79 | return None 80 | 81 | 82 | def generate_unique_id(fname): 83 | # Generate MD5 hash of the filename 84 | hash_object = hashlib.md5(fname.name.encode()) 85 | hex_dig = hash_object.hexdigest() 86 | return hex_dig 87 | 88 | 89 | def load_image(pdfs: Union[list[UploadedFile], None, UploadedFile], metadata = None, restart_image_analysis:bool = False, ) -> Optional[list[Document]]: 90 | if pdfs is not None: 91 | docs = [] 92 | if metadata is None: 93 | metadata = {} 94 | metadata.update({Metadata.CHUNK_TYPE.value: ChunkType.IMAGE.value}) 95 | for pdf in pdfs: 96 | if pdf.type == "application/pdf": 97 | # Generate a unique identifier for each document 98 | tmp_id_based_on_file_upload = generate_unique_id(pdf) 99 | # Construct a save directory and create it 100 | save_dir = f"{extract_image_output_dir}/{tmp_id_based_on_file_upload}" 101 | if restart_image_analysis: 102 | # Before processing is done, remove the directory and its contents 103 | shutil.rmtree(save_dir) 104 | 105 | reader = PdfReader(pdf) 106 | 107 | os.makedirs(save_dir, exist_ok=True) 108 | 109 | for i, page in enumerate(reader.pages, start=1): 110 | for image in page.images: 111 | 112 | save_path = f"{save_dir}/{image.name}" 113 | json_path = f"{save_dir}/{image.name}.json" 114 | 115 | if os.path.exists(json_path): 116 | # skip the image if it is already processed 117 | with open(json_path, "r") as file: # Open the document file 118 | doc_data = json.load(file) # Load the data from the document 119 | # Create a new Document instance using the loaded data 120 | doc = Document(page_content=doc_data['page_content'], metadata=doc_data['metadata']) 121 | docs.append(doc) # Add the document to the docs list 122 | continue 123 | 124 | with open(save_path, "wb") as fp: 125 | fp.write(image.data) 126 | 127 | # Determine image type 128 | image_type = imghdr.what(save_path) 129 | media_type = f"image/{image_type}" 130 | 131 | image_content = encode_image(save_path) 132 | image_description = image_to_text(image_content, media_type) 133 | if image_description is not None: 134 | page_metadata = {'page': i, 'filename': pdf.name, 'media_type': media_type} 135 | page_metadata.update(metadata) 136 | doc = Document(page_content=clean_text(image_description), metadata=page_metadata) 137 | docs.append(doc) 138 | 139 | with open(json_path, "w") as file: 140 | json.dump(doc.__dict__, file) 141 | 142 | else: 143 | print(f"Failed to extract text from image {image.name}.") 144 | 145 | put_file(io.BytesIO(image.data), image.name, CollectionType.IMAGES.value) 146 | return docs 147 | else: 148 | return None 149 | 150 | 151 | def encode_image(image_path): 152 | """Function to encode images""" 153 | with open(image_path, "rb") as image_file: 154 | return base64.b64encode(image_file.read()).decode('utf-8') 155 | -------------------------------------------------------------------------------- /rag_assistant/shared/llm_facade.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from utils.config_loader import load_config 4 | 5 | config = load_config() 6 | 7 | 8 | # La génération par LLM va etre fait au moment de la création du summary index. 9 | # cela prend trop de temps ici. 10 | # l'objectif est de mutualiser la fonctionnalités conversations starters pour les deux chats 11 | # llm = load_model(streaming=True) 12 | # 13 | # context = summary_query_engine.query("Make a complete summary of knowledge available" 14 | # " on following topics {topics}.") 15 | # 16 | # ### Answer question ### 17 | # cs_system_prompt = """You are a helpful solution architect and software engineer assistant. 18 | # Your users are asking questions on specific topics.\ 19 | # Suggest exactly 6 questions related to the provided context to help them find the information they need. \ 20 | # Suggest only short questions without compound sentences. \ 21 | # Question must be self-explanatory and topic related. 22 | # Suggest a variety of questions that cover different aspects of the context. \ 23 | # Use the summary of knowledge to generate the question on topics. \ 24 | # Make sure they are complete questions, and that they are related to the topics. 25 | # Output one question per line. Do not number the questions. Do not group question by topics. 26 | # DO NOT make a summary or an introduction of your result. Output ONLY the generated questions. 27 | # DO NOT output chapter per topic. Avoid blank line. 28 | # Avoid duplicate question. Generate question in French. 29 | # Questions: """ 30 | # 31 | # # Examples: 32 | # # What information needs to be provided during IHM launch? 33 | # # How is the data transferred to the service call? 34 | # # What functions are involved in API Management? 35 | # # What does the Exposure function in API Management entail? 36 | # 37 | # cs_prompt = ChatPromptTemplate.from_messages( 38 | # [ 39 | # ("system", cs_system_prompt), 40 | # ("human", "{topics}" 41 | # "{summary}"), 42 | # ] 43 | # ) 44 | # output_parser = StrOutputParser() 45 | # model = load_model(streaming=False) 46 | # 47 | # chain = cs_prompt | model | output_parser 48 | # response = chain.invoke({"topics": topics, "summary": context}) 49 | 50 | def get_conversation_starters(topics: list[str], count:int = 4): 51 | 52 | response = "" 53 | 54 | response_list = [line for line in response.split("\n") if line.strip() != ''] 55 | if len(response_list) > count: 56 | response_list = random.sample(response_list, count) 57 | 58 | elif len(response_list) < count: 59 | diff = count - len(response_list) 60 | 61 | suggested_questions = suggested_questions_examples 62 | 63 | # check if 'API' is in topics 64 | if 'API' in topics: 65 | suggested_questions.extend(suggested_questions_examples_api) 66 | 67 | # check if 'IHM' is in topics 68 | if 'IHM' in topics: 69 | suggested_questions.extend(suggested_questions_examples_ihm) 70 | 71 | all_questions = list(suggested_questions) 72 | 73 | selected_questions = set(response_list) 74 | 75 | while len(selected_questions) < count: 76 | question = random.choice(suggested_questions) 77 | selected_questions.add(question) 78 | 79 | response_list = list(selected_questions) 80 | # for _ in range(min(count, len(all_questions))): 81 | # question = random.choice(all_questions) 82 | # all_questions.remove(question) 83 | # response_list.append(question) 84 | 85 | #additional_questions = random.sample(suggested_questions, diff) 86 | #response_list.extend(additional_questions) 87 | 88 | return response_list 89 | 90 | 91 | suggested_questions_examples = [ 92 | "Comment sécuriser les données sensibles ?", 93 | "Comment assurer l'efficacité des performances ?", 94 | "En quoi consiste l'Analyse de risque MESARI ?", 95 | "A quoi sert le Cross Origin Resource Sharing ?", 96 | "Quels sont les principes de la Content Security Policy ?", 97 | "Comment garantir la sécurité des échanges entre applications ?", 98 | "Quelles sont les bonnes pratiques pour assurer la fiabilité des ressources Web ?", 99 | "Pourquoi suivre les spécifications associées est-il important ?" 100 | ] 101 | # API 102 | suggested_questions_examples_api = [ 103 | "Quels sont les mécanismes d'authentification API ?", 104 | "Quelles sont les principales fonctionnalités du portail fournisseur ?", 105 | "Que comprend la fonction d'exposition dans la gestion des API ?", 106 | "Quelle est la différence entre SOAP et REST ?", 107 | "Que signifie l'acronyme API ?", 108 | "Quels formats de données sont couramment utilisés dans les APIs ?", 109 | "Comment tester et déboguer une API ?", 110 | "Quels sont les avantages d'utiliser une API ?", 111 | "Que signifie REST et quels en sont les principes clés ?", 112 | "Comment gérer les versions dans une API ?", 113 | "Quels outils permettent de documenter une API ?", 114 | "Comment implémenter une pagination dans une API ?", 115 | "Qu'est-ce qu'une architecture d'API ?", 116 | ] 117 | suggested_questions_examples_ihm = [ 118 | # IHM 119 | "Quelles informations doivent être fournies lors du lancement de l'IHM?", 120 | "Quels sont les principes de base d'une bonne conception d'interface utilisateur ?", 121 | "Comment rendre une interface utilisateur accessible aux personnes handicapées ?", 122 | "Quels sont les différents types de composants d'interface utilisateur ?", 123 | "Comment concevoir une expérience utilisateur cohérente sur différents appareils ?", 124 | "Quels sont les avantages du design 'mobile first' ?", 125 | "Comment effectuer des tests d'utilisabilité pour une interface ?", 126 | "Que signifie 'responsive design' pour une interface web ?", 127 | "Quels frameworks facilitent le développement d'interfaces utilisateur modernes ?", 128 | "Comment optimiser les performances d'une interface utilisateur ?", 129 | "Quelle est l'importance des conventions de conception dans une interface ?", 130 | ] 131 | # AUTRES QUESTIONS API 132 | # "Comment structurer une API RESTful ?", 133 | # "Quels sont les bons usages des méthodes HTTP ?", 134 | # "Comment définir des URIs pour les ressources ?", 135 | # "Qu'est-ce que HATEOAS et comment l'implémenter ?", 136 | # "Comment paginer et filtrer des collections de ressources ?", 137 | # "Quels mécanismes utiliser pour l'authentification API ?", 138 | # "Comment gérer les versions d'une API ?", 139 | # "Quelle est la stratégie de contrôle d'accès recommandée ?", 140 | # "Comment documenter une API efficacement ?", 141 | # "Comment implémenter le throttling pour une API ?", 142 | # "Quels sont les principes de conception d'une IHM intuitive ?", 143 | # "Comment assurer la résilience d'une API ?", 144 | # "Quels sont les formats standards pour les données API ?", 145 | # "Comment surveiller la performance d'une API ?", 146 | # "Quels sont les aspects de sécurité à considérer pour une API ?", 147 | # "Comment gérer la rétrocompatibilité des API ?", 148 | # "Quels sont les avantages du caching pour une API ?", 149 | # "Comment assurer la haute disponibilité d'une API ?", 150 | # "Quels outils utiliser pour le monitoring d'une API ?", 151 | # "Comment prévenir les injections dans une API ?" 152 | 153 | 154 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # applied-ai-rag-assistant 2 | Assistant RAG Advanced with Streamlit, Langchain, LlamaIndex and ChromaDB 3 | 4 | Initially forked from https://github.com/langchain-ai/streamlit-agent/ `chat_with_documents.py` 5 | 6 | Apps feature LangChain 🤝 Streamlit integrations such as the 7 | [Callback integration](https://python.langchain.com/docs/modules/callbacks/integrations/streamlit) and 8 | [StreamlitChatMessageHistory](https://python.langchain.com/docs/integrations/memory/streamlit_chat_message_history). 9 | 10 | Now we have added Mistral La Plateforme, Bedrock, llamaindex and langchain agent for advanced RAG, model vision on RAG with anthropic claude. 11 | 12 | ## Setup 13 | 14 | This project uses [Poetry](https://python-poetry.org/) for dependency management. 15 | 16 | ```shell 17 | # Create Python environment 18 | $ poetry install 19 | 20 | # Install git pre-commit hooks 21 | $ poetry shell 22 | $ pre-commit install 23 | ``` 24 | 25 | ### Note on package dependencies 26 | For now, we are not forcing package's version in poetry and try to upgrade as fast as we can. :) 27 | As we are using a lot of new and young "GENAI" component that have not finalized their interface, 28 | application and tests tends to break a lot and often especially they are not testing evolution with each other. 29 | 30 | Main packages are: 31 | - Langchain (LLM Orchestration and agent) 32 | - LlamaIndex (RAG) 33 | - Streamlit (UX) 34 | - TruLens (Testing) 35 | - Chroma (Vector Store) 36 | - OpenAI (LLM) 37 | - MistralAI (LLM) 38 | - boto3 (for bedrock and AWS integration) 39 | 40 | ## Running 41 | 42 | ### Environment variables 43 | The project expects some environment variables to be setup in order to run. 44 | Some are mandatory for running and some are only needed if you want to run on a specific platform. 45 | 46 | The project currently supports the following platforms: OPENAI, AZURE, MISTRAL, BEDROCK (AWS). 47 | 48 | We recommend to add the variables in a .env file within the directory path outside the project directory to avoid any accidental commit. 49 | Your home directory is fine. 50 | 51 | Here are the variables: 52 | 53 | ```shell 54 | OPENAI_API_KEY= 55 | MISTRAL_API_KEY= 56 | AZURE_OPENAI_API_KEY= 57 | HF_TOKEN= 58 | LANGCHAIN_TRACING_V2= 59 | LANGCHAIN_API_KEY= 60 | ``` 61 | 62 | ### MISTRAL PLATFORM 63 | If you want to use MISTRAL PLATFORM, you need a MISTRAL_API_KEY and a HF_TOKEN. 64 | HF_TOKEN is required to download the embeddings from hugging face. 65 | It is done automatically but you need to have the HF_TOKEN and to have granted access on the model page on hugging face. 66 | https://huggingface.co/mistralai/Mixtral-8x7B-v0.1 67 | 68 | 69 | ### LANGSMITH and LLM OBSERVABILITY 70 | We are using LANGSMITH for LLM Observability. 71 | Langsmith requires LANGCHAIN_TRACING_V2 and a LANGCHAIN_API_KEY. 72 | 73 | You can stop tracing with 'LANGCHAIN_TRACING_V2=false'. 74 | Oddly 'LANGCHAIN_API_KEY' is still required even if you set 'LANGCHAIN_TRACING_V2' to false. 75 | But you can put anything in it, the variable should only exist. 76 | 77 | LANGSMITH is free for personal use with a quota limit of 5k traces per month. 78 | It is very useful so I recommend it to you. 79 | 80 | https://smith.langchain.com/ 81 | 82 | ### AWS BEDROCK 83 | If you want to use Bedrock (AWS), you can define your credential in $HOME/.aws/credentials directory 84 | We use eu-west-3 and eu-central-1 for claude anthropic, mistral large and titan embeddings within bedrock. 85 | Adapt it to your own needs. Beware that models are not consistently deployed within AWS region. 86 | 87 | 88 | ### MODEL VISION 89 | We are starting to add model vision support in our assistant. 90 | For now, we are only supporting CLAUDE 3 vision with BEDROCK AWS. 91 | 92 | 93 | ## Config 94 | Most parameters like model name, region, etc. can be modified in conf/config.ini for all model providers. 95 | 96 | 97 | ## Testing 98 | We use pytest and Trulens to evaluate the assistant (RAG Triad). 99 | 100 | For RAG testing, we are using OpenAI as provider for trulens feedback function so you need at least openai api key to make it work. 101 | But you can adapt it for your own purpose. 102 | 103 | Tests in tests/utils/ directory use Mistral Large through 'La Platforme' so you'll MISTRAL_API_KEY. 104 | Tests in tests/rag/ use bedrock (AWS) and openai GPT. So you'll need OPENAI_API_KEY and AWS credentials. 105 | 106 | 107 | ```shell 108 | # Run mrkl_demo.py or another app the same way 109 | $ streamlit run streamlit_agent/0_Chat_with_Docs.py 110 | ``` 111 | 112 | # Running with Docker (OLD) 113 | 114 | This project includes `Dockerfile` to run the app in Docker container. In order to optimise the Docker Image is optimised for size and building time with cache techniques. 115 | 116 | To generate Image with `DOCKER_BUILDKIT`, follow below command 117 | 118 | ```DOCKER_BUILDKIT=1 docker build --target=runtime . -t applied-ai-rag-assistant:latest``` 119 | 120 | 1. Run the docker container directly 121 | 122 | ``docker run -d --name applied-ai-rag-assistant -p 8051:8051 applied-ai-rag-assistant:latest `` 123 | 124 | 2. Run the docker container using docker-compose (Recommended) 125 | 126 | Edit the Command in `docker-compose` with target streamlit app 127 | 128 | ``docker-compose up`` 129 | 130 | ## Run the app with Docker 131 | 132 | Build the image: 133 | ```sh 134 | docker build -t ai_assistant . 135 | ``` 136 | 137 | Then run a container from the image we just created : 138 | ```sh 139 | docker run -p 80:80 -e OPENAI_API_KEY="secret_value" ai_assistant 140 | ``` 141 | Replace secret_value with your openai key. 142 | 143 | The application should run on http://localhost:80/ 144 | 145 | ## Run the app on AWS 146 | 147 | Install AWS CLI : https://docs.aws.amazon.com/fr_fr/cli/latest/userguide/getting-started-install.html 148 | Install Docker : https://docs.docker.com/engine/install/ 149 | 150 | Build and push the Docker image on the AWS Elastic Container Registry (ECR) with AWS CLI: 151 | ```sh 152 | docker build -t ai_assistant . 153 | aws configure set aws_access_key_id "access-key" 154 | aws configure set aws_secret_access_key "secret-access-key" 155 | aws ecr get-login-password --region eu-west-1 | docker login --username AWS --password-stdin 441525731509.dkr.ecr.eu-west-1.amazonaws.com 156 | docker tag ai_assistant:latest 441525731509.dkr.ecr.eu-west-1.amazonaws.com/ai_assistant:latest 157 | docker push 441525731509.dkr.ecr.eu-west-1.amazonaws.com/ai_assistant:latest 158 | ``` 159 | Replace access-key and secret-access-key with valid AWS credentials that will be used to push to the ECR. The AWS user must have the correct rights to push image on the ECR. 160 | 161 | Once the image pushed on the ECR, go to the terraform directory with (make sure to meet the basic requirements in AWS so that the terraform files works (see AWS resources requirements)) : 162 | ```sh 163 | cd terraform 164 | ``` 165 | and run: 166 | ```sh 167 | export AWS_ACCESS_KEY="access-key" 168 | export AWS_SECRET_ACCESS_KEY="secret-access-key" 169 | terraform init 170 | terraform plan 171 | terraform apply 172 | ``` 173 | Terraform is needed : https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli 174 | Replace access-key and secret-access-key with valid AWS credentials that will be used to create the resources. The account must have all the necessary rights to create and access the resources needed. 175 | 176 | You will find the ECS cluster here : https://eu-west-1.console.aws.amazon.com/ecs/v2/clusters?region=eu-west-1 177 | 178 | To redeploy the service with the latest version of the application, go to : https://eu-west-1.console.aws.amazon.com/ecs/v2/clusters/ai_assistant/services?region=eu-west-1. 179 | Select your running service in the services list 180 | Click on Update. 181 | Check Force new Deployment, and click on Update. 182 | The latest version of the image will be deployed with the new service. -------------------------------------------------------------------------------- /tests/utils/test_utilsrag_li.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | from dotenv import load_dotenv, find_dotenv 4 | 5 | from llama_index.core import SimpleDirectoryReader, Settings 6 | from llama_index.embeddings.mistralai import MistralAIEmbedding 7 | from llama_index.llms.mistralai import MistralAI 8 | 9 | import rag_assistant.utils.utilsrag_li 10 | from rag_assistant.utils.utilsrag_li import create_automerging_engine, create_sentence_window_engine, create_subquery_engine, \ 11 | create_direct_query_engine, create_li_agent 12 | 13 | import shutil 14 | 15 | 16 | import numpy as np 17 | 18 | import nest_asyncio 19 | 20 | from trulens_eval import ( 21 | Feedback, 22 | TruLlama, 23 | OpenAI, 24 | Tru, Select 25 | ) 26 | from trulens_eval.app import App 27 | 28 | load_dotenv(find_dotenv()) 29 | 30 | # Set OpenAI API key from Streamlit secrets 31 | openai_api_key = os.getenv('OPENAI_API_KEY') 32 | 33 | nest_asyncio.apply() 34 | 35 | provider = OpenAI() 36 | 37 | def get_openai_api_key(): 38 | _ = load_dotenv(find_dotenv()) 39 | 40 | return os.getenv("OPENAI_API_KEY") 41 | 42 | 43 | def get_hf_api_key(): 44 | _ = load_dotenv(find_dotenv()) 45 | 46 | return os.getenv("HUGGINGFACE_API_KEY") 47 | 48 | 49 | def get_trulens_feedbacks(query_engine): 50 | 51 | context = App.select_context(query_engine) 52 | 53 | qa_relevance = ( 54 | Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance") 55 | .on_input_output() 56 | ) 57 | 58 | qs_relevance = ( 59 | Feedback(provider.context_relevance_with_cot_reasons, name="Context Relevance") 60 | .on_input() 61 | .on(context) 62 | .aggregate(np.mean) 63 | ) 64 | 65 | groundedness = ( 66 | Feedback(provider.groundedness_measure_with_cot_reasons, name = "Groundedness") 67 | .on(context.collect()) 68 | .on_output() 69 | ) 70 | 71 | feedbacks = [qa_relevance, qs_relevance, groundedness] 72 | return feedbacks 73 | 74 | 75 | def get_prebuilt_trulens_recorder(query_engine, app_id, feedbacks): 76 | tru_recorder = TruLlama( 77 | query_engine, 78 | app_id=app_id, 79 | feedbacks=feedbacks 80 | ) 81 | return tru_recorder 82 | 83 | 84 | @pytest.fixture(scope="module") 85 | def temp_dir(request): 86 | # TODO define a test specific dir... 87 | # Setup: Create a temporary directory for the test module 88 | dir_name = rag_assistant.utils.utilsrag_li.llama_index_root_dir 89 | os.makedirs(dir_name, exist_ok=True) 90 | shutil.rmtree(dir_name) 91 | # Yield the directory name to the tests 92 | yield dir_name 93 | 94 | # Teardown: Remove the temporary directory after tests are done 95 | if os.path.isdir(dir_name): # Check if the directory exists before removing it 96 | #shutil.rmtree(dir_name) # TODO commenting this while fix above is not done 97 | pass 98 | 99 | 100 | def llm_prepare(): 101 | llm = MistralAI(model="mistral-large-latest") 102 | 103 | Settings.llm = llm 104 | 105 | return llm 106 | 107 | 108 | def embeddings_prepare(): 109 | embed_model = MistralAIEmbedding() 110 | 111 | Settings.embed_model = embed_model 112 | 113 | return embed_model 114 | 115 | 116 | @pytest.fixture 117 | def docs_prepare(): 118 | documents = SimpleDirectoryReader( 119 | input_files=["tests/utils/eBook-How-to-Build-a-Career-in-AI.pdf"] 120 | ).load_data() 121 | return documents 122 | 123 | 124 | @pytest.fixture 125 | def eval_questions_prepare(): 126 | eval_questions = [] 127 | with open('tests/utils/eval_questions.txt', 'r') as file: 128 | for line in file: 129 | # Remove newline character and convert to integer 130 | item = line.strip() 131 | print(item) 132 | eval_questions.append(item) 133 | return eval_questions 134 | 135 | 136 | def test_automerging_agent(temp_dir, 137 | docs_prepare, eval_questions_prepare, trulens_prepare): 138 | 139 | llm = llm_prepare() 140 | 141 | query_engine = create_automerging_engine(docs_prepare) 142 | 143 | feedbacks = get_trulens_feedbacks(query_engine) 144 | 145 | tru_recorder = get_prebuilt_trulens_recorder(query_engine, 146 | app_id="Automerging Query Engine", 147 | feedbacks=feedbacks) 148 | 149 | with tru_recorder as recording: 150 | for question in eval_questions_prepare: 151 | response = query_engine.query(question) 152 | assert response is not None, "L'interprétation n'a pas retourné de résultat." 153 | 154 | agent = create_li_agent(name="test_automerging_agent", description="Test Automerging Agent", 155 | query_engine=query_engine, llm=llm) 156 | 157 | response = agent.query("How do I get started on a personal project in AI?") 158 | print(f"response: {str(response)}") 159 | assert response is not None, "L'interprétation n'a pas retourné de résultat." 160 | 161 | 162 | def test_sentence_window_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare): 163 | 164 | llm = llm_prepare() 165 | 166 | query_engine = create_sentence_window_engine( 167 | docs_prepare, 168 | ) 169 | 170 | feedbacks = get_trulens_feedbacks(query_engine) 171 | 172 | tru_recorder = get_prebuilt_trulens_recorder(query_engine, 173 | app_id="Sentence Window Query Engine", 174 | feedbacks=feedbacks) 175 | 176 | with tru_recorder as recording: 177 | for question in eval_questions_prepare: 178 | response = query_engine.query(question) 179 | 180 | agent = create_li_agent(name="test_sentence_window_agent", description="Test Sentence Window Agent", 181 | query_engine=query_engine, llm=llm) 182 | 183 | response = agent.query("How do I get started on a personal project in AI?") 184 | assert response is not None, "L'interprétation n'a pas retourné de résultat." 185 | 186 | 187 | def test_llamaindex_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare): 188 | 189 | llm = llm_prepare() 190 | 191 | query_engine = create_direct_query_engine( 192 | docs_prepare, 193 | ) 194 | 195 | feedbacks = get_trulens_feedbacks(query_engine) 196 | 197 | tru_recorder = get_prebuilt_trulens_recorder(query_engine, 198 | app_id="Direct Query Engine", 199 | feedbacks=feedbacks) 200 | 201 | with tru_recorder as recording: 202 | for question in eval_questions_prepare: 203 | response = query_engine.query(question) 204 | 205 | agent = create_li_agent(name="test_direct_query_agent", description="Test Direct Query Agent", 206 | query_engine=query_engine, 207 | llm=llm) 208 | 209 | response = agent.query("How do I get started on a personal project in AI?") 210 | assert response is not None, "L'interprétation n'a pas retourné de résultat." 211 | 212 | 213 | def test_subquery_agent(temp_dir, docs_prepare, eval_questions_prepare, trulens_prepare): 214 | 215 | llm = llm_prepare() 216 | 217 | topics = ["AI", "Other"] 218 | query_engine = create_subquery_engine( 219 | topics, 220 | docs_prepare, 221 | ) 222 | 223 | feedbacks = get_trulens_feedbacks(query_engine) 224 | 225 | tru_recorder = get_prebuilt_trulens_recorder(query_engine, 226 | app_id="Sub Query Engine", 227 | feedbacks=feedbacks) 228 | 229 | with tru_recorder as recording: 230 | for question in eval_questions_prepare: 231 | response = query_engine.query(question) 232 | 233 | agent = create_li_agent(name="test_subquery_agent", description="Test Subquery Agent", 234 | query_engine=query_engine, 235 | llm=llm) 236 | 237 | response = agent.query("How do I get started on a personal project in AI?") 238 | assert response is not None, "L'interprétation n'a pas retourné de résultat." 239 | 240 | -------------------------------------------------------------------------------- /rag_assistant/utils/utilsllm.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import openai 3 | import os 4 | 5 | from typing import Optional 6 | 7 | from langchain_aws.chat_models import ChatBedrock 8 | from langchain_core.embeddings import Embeddings 9 | from langchain_core.language_models import BaseChatModel 10 | from langchain_openai import ChatOpenAI, AzureChatOpenAI 11 | from langchain_openai.embeddings import OpenAIEmbeddings, AzureOpenAIEmbeddings 12 | from langchain_mistralai import ChatMistralAI, MistralAIEmbeddings 13 | from langchain_aws.embeddings.bedrock import BedrockEmbeddings 14 | from llama_index.core.base.embeddings.base import BaseEmbedding 15 | from llama_index.core.llms import LLM 16 | 17 | from llama_index.embeddings.mistralai import MistralAIEmbedding as LIMistralAIEmbedding 18 | from llama_index.embeddings.openai import OpenAIEmbedding as LIOpenAIEmbedding 19 | from llama_index.embeddings.bedrock import BedrockEmbedding as LIBedrockEmbedding 20 | from llama_index.llms.mistralai import MistralAI as LIMistralAI 21 | from llama_index.llms.openai import OpenAI as LIOpenAI 22 | from llama_index.llms.bedrock import Bedrock as LIBedrock 23 | 24 | 25 | from dotenv import load_dotenv, find_dotenv 26 | 27 | from .config_loader import load_config 28 | 29 | config = load_config() 30 | 31 | # read local .env file 32 | _ = load_dotenv(find_dotenv()) 33 | 34 | openai.api_key = os.getenv('OPENAI_API_KEY') 35 | mistral_api_key = os.getenv('MISTRAL_API_KEY') 36 | aws_profile_name = os.getenv('profile_name') 37 | 38 | bedrock_region_name = config["BEDROCK"]["AWS_REGION_NAME"] 39 | bedrock_endpoint_url = config["BEDROCK"]["BEDROCK_ENDPOINT_URL"] 40 | 41 | # instantiating the Bedrock client, and passing in the CLI profile 42 | # TODO should be done lazyly only for bedrock 43 | bedrock = boto3.client('bedrock-runtime', bedrock_region_name, 44 | endpoint_url=bedrock_endpoint_url) 45 | 46 | model_kwargs = { 47 | #"maxTokenCount": 4096, 48 | #"stopSequences": [], 49 | "temperature": 0, 50 | #"topP": 1, 51 | } 52 | 53 | 54 | def get_model_provider(model_name:str = None) -> Optional[str]: 55 | provider = None 56 | if model_name is None: 57 | provider = config['MODEL_PROVIDER']['MODEL_PROVIDER'] 58 | elif model_name.startswith("gpt"): 59 | provider = "OPENAI" 60 | elif model_name.startswith("mistral-"): 61 | provider = "MISTRAL" 62 | elif model_name.startswith("mistral.mi"): 63 | provider = "BEDROCK" 64 | elif model_name.startswith("anthropic"): 65 | provider = "BEDROCK" 66 | 67 | return provider 68 | 69 | 70 | def get_model_name(provider: str, model_name: str = None) -> Optional[str]: 71 | 72 | if provider is None: 73 | provider = config['MODEL_PROVIDER']['MODEL_PROVIDER'] 74 | 75 | if provider == "AZURE": 76 | model_name = config['AZURE']['AZURE_OPENAI_API_VERSION'] 77 | elif provider == "OPENAI": 78 | if model_name is None: 79 | model_name = config['OPENAI']['OPENAI_MODEL_NAME'] 80 | elif provider == "MISTRAL": 81 | if model_name is None: 82 | model_name = config['MISTRAL']['CHAT_MODEL'] 83 | elif provider == "BEDROCK": 84 | if model_name is None: 85 | model_name = config['BEDROCK']['CHAT_MODEL'] 86 | 87 | return model_name 88 | 89 | 90 | def load_model(model_name: str = None, temperature: float = 0, streaming:bool = False) -> BaseChatModel: 91 | 92 | provider = get_model_provider(model_name) 93 | model_name = get_model_name(model_name=model_name, provider=provider) 94 | if provider == "AZURE": 95 | llm = AzureChatOpenAI( 96 | openai_api_version=config['AZURE']['AZURE_OPENAI_API_VERSION'], 97 | azure_endpoint=config['AZURE']['AZURE_OPENAI_ENDPOINT'], 98 | azure_deployment=model_name, 99 | api_key=os.environ["AZURE_OPENAI_API_KEY"] 100 | ) 101 | elif provider == "OPENAI": 102 | llm = ChatOpenAI(model_name=model_name, temperature=temperature, streaming=streaming) 103 | elif provider == "MISTRAL": 104 | llm = ChatMistralAI(mistral_api_key=mistral_api_key, model=model_name, temperature=temperature) 105 | elif provider == "BEDROCK": 106 | # ChatBedrock --> must be adapted for system prompt get error "first message must use the "user" role" 107 | # temperature not supported 108 | llm = ChatBedrock( 109 | client=bedrock, 110 | model_id=model_name, 111 | streaming=streaming 112 | ) 113 | 114 | else: 115 | raise NotImplementedError(f"Model {provider} unknown.") 116 | 117 | return llm 118 | 119 | 120 | def load_llamaindex_model(model_name: str = None, temperature: float = 0) -> LLM: 121 | 122 | provider = get_model_provider(model_name) 123 | model_name = get_model_name(provider=provider, model_name=model_name) 124 | 125 | if provider == "AZURE": 126 | raise NotImplementedError(f"Model {provider} unsupported for LlamaIndex.") 127 | elif provider == "OPENAI": 128 | llm = LIOpenAI(model=model_name, temperature=temperature) 129 | elif provider == "MISTRAL": 130 | llm = LIMistralAI(api_key=mistral_api_key, model=model_name, temperature=temperature) 131 | elif provider == "BEDROCK": 132 | # ChatBedrock --> must be adapted for system prompt get error "first message must use the "user" role" 133 | llm = LIBedrock( 134 | client=bedrock, 135 | model=model_name, 136 | temperature=temperature 137 | ) 138 | else: 139 | raise NotImplementedError(f"Model {model_name} unknown.") 140 | 141 | return llm 142 | 143 | 144 | def get_embeddings_model_name(provider: str, embeddings_model_name: str = None) -> Optional[str]: 145 | 146 | if provider is None: 147 | provider = config['MODEL_PROVIDER']['MODEL_PROVIDER'] 148 | 149 | if provider == "AZURE": 150 | embeddings_model_name = config['AZURE']['AZURE_OPENAI_EMBEDDING_DEPLOYMENT'] 151 | elif provider == "OPENAI": 152 | if embeddings_model_name is None: 153 | embeddings_model_name = config['OPENAI']['EMBEDDINGS_MODEL'] 154 | elif provider == "MISTRAL": 155 | if embeddings_model_name is None: 156 | embeddings_model_name = config['MISTRAL']['EMBEDDINGS_MODEL'] 157 | elif provider == "BEDROCK": 158 | if embeddings_model_name is None: 159 | embeddings_model_name = config["BEDROCK"]["EMBEDDINGS_MODEL"] 160 | 161 | return embeddings_model_name 162 | 163 | 164 | 165 | def load_embeddings(model_name: str = None) -> Embeddings: 166 | 167 | provider = get_model_provider(model_name) 168 | embeddings_model = get_embeddings_model_name(provider=provider, embeddings_model_name=model_name) 169 | 170 | if provider == "AZURE": 171 | embeddings = AzureOpenAIEmbeddings( 172 | azure_deployment=embeddings_model, 173 | azure_endpoint=config['AZURE']['AZURE_OPENAI_ENDPOINT'], 174 | openai_api_version=config['AZURE']["AZURE_OPENAI_API_VERSION"], 175 | api_key=os.environ["AZURE_OPENAI_API_KEY"] 176 | ) 177 | elif provider == "OPENAI": 178 | embeddings = OpenAIEmbeddings(model_name=embeddings_model) 179 | elif provider == "MISTRAL": 180 | embeddings = MistralAIEmbeddings(model_name=embeddings_model) 181 | elif provider == "BEDROCK": 182 | embeddings = BedrockEmbeddings( 183 | client=bedrock, 184 | region_name=bedrock_region_name, 185 | model_id=embeddings_model) 186 | else: 187 | raise NotImplementedError(f"Model {model_name} unknown.") 188 | 189 | return embeddings 190 | 191 | 192 | def load_llamaindex_embeddings(model_name: str = None) -> BaseEmbedding: 193 | 194 | provider = get_model_provider(model_name) 195 | embeddings_model = get_embeddings_model_name(provider=provider, embeddings_model_name=model_name) 196 | 197 | if provider == "AZURE": 198 | raise NotImplementedError(f"Embeddings {provider} unsupported for LlamaIndex.") 199 | elif provider == "OPENAI": 200 | embeddings = LIOpenAIEmbedding(model=embeddings_model) 201 | elif provider == "MISTRAL": 202 | embeddings = LIMistralAIEmbedding(model_name=embeddings_model) 203 | elif provider == "BEDROCK": 204 | embeddings = LIBedrockEmbedding( 205 | region_name=bedrock_region_name, 206 | model_name=embeddings_model, 207 | client=bedrock) 208 | else: 209 | raise NotImplementedError(f"Model {model_name} unknown.") 210 | 211 | return embeddings 212 | 213 | 214 | -------------------------------------------------------------------------------- /rag_assistant/utils/utilsrag_lc.py: -------------------------------------------------------------------------------- 1 | from langchain.chains.retrieval_qa.base import RetrievalQA 2 | from langchain_core.language_models import BaseChatModel 3 | from langchain_core.vectorstores import VectorStore 4 | from pydantic import BaseModel, Field 5 | 6 | from langchain.docstore.document import Document 7 | 8 | from langchain.schema.prompt_template import format_document 9 | 10 | from langchain.chains.combine_documents import collapse_docs, split_list_of_docs 11 | from functools import partial 12 | from operator import itemgetter 13 | 14 | from langchain.callbacks.manager import trace_as_chain_group 15 | 16 | from langchain.utils.openai_functions import convert_pydantic_to_openai_function 17 | from langchain.output_parsers.openai_functions import PydanticOutputFunctionsParser 18 | 19 | from langchain_openai import ChatOpenAI 20 | from langchain.prompts import PromptTemplate 21 | from langchain.schema import StrOutputParser 22 | from langchain.schema.runnable import RunnableParallel, RunnablePassthrough 23 | 24 | from .config_loader import load_config 25 | 26 | config = load_config() 27 | 28 | # "local:BAAI/bge-small-en-v1.5" 29 | 30 | 31 | # https://python.langchain.com/docs/use_cases/question_answering/ 32 | # https://python.langchain.com/docs/modules/chains/document/stuff 33 | # https://python.langchain.com/docs/modules/chains/document/map_reduce 34 | # https://python.langchain.com/docs/modules/chains/document/refine 35 | # https://python.langchain.com/docs/modules/chains/document/map_rerank 36 | def invoke(question: str, template: str, llm: BaseChatModel, chain_type: str, vectorstore: VectorStore, 37 | search_type: str, k: int, verbose: bool): 38 | 39 | retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs={'k': k}) 40 | docs = retriever.invoke(question) 41 | output = None 42 | 43 | if verbose: 44 | print(docs) 45 | 46 | document_prompt = PromptTemplate.from_template("{page_content}") 47 | partial_format_document = partial(format_document, prompt=document_prompt) 48 | # temporary to replace with incoming question 49 | 50 | map_prompt = PromptTemplate.from_template( 51 | "Answer the user question using the context." 52 | "\n\nContext:\n\n{context}\n\nQuestion: {question}" 53 | ) 54 | 55 | rag_prompt_custom = PromptTemplate.from_template(template) 56 | 57 | def format_docs(docs): 58 | return "\n\n".join(doc.page_content for doc in docs) 59 | 60 | if chain_type == "stuff": 61 | print("stuff chain") 62 | 63 | rag_chain = ( 64 | { 65 | "context": lambda x: "\n\n".join( 66 | format_document(doc, document_prompt) for doc in x["docs"] 67 | ), 68 | "question": itemgetter("question"), 69 | } 70 | | map_prompt 71 | | llm 72 | | StrOutputParser() 73 | ) 74 | output = rag_chain.invoke({"docs": docs, "question": question}) 75 | 76 | elif chain_type == "map_reduce": 77 | 78 | print("map_reduce chain") 79 | 80 | # PromptTemplate.from_template("Summarize this content:\n\n{context}") 81 | first_prompt = PromptTemplate.from_template( 82 | "Answer the user question using the context." 83 | "\n\nContext:\n\n{context}\n\nQuestion: " + question 84 | ) 85 | # first_prompt = first_prompt.format_prompt(question=question) 86 | 87 | # The chain we'll apply to each individual document. 88 | map_chain = ( 89 | {"context": partial_format_document} 90 | | first_prompt 91 | | llm 92 | | StrOutputParser() 93 | ) 94 | 95 | # A wrapper chain to keep the original Document metadata 96 | map_as_doc_chain = ( 97 | RunnableParallel({"doc": RunnablePassthrough(), "context": map_chain}) 98 | | ( 99 | lambda x: Document(page_content=x["context"], metadata=x["doc"].metadata) 100 | ) 101 | ).with_config(run_name="Summarize (return doc)") 102 | 103 | # The chain we'll repeatedly apply to collapse subsets of the documents 104 | # into a consolidate document until the total token size of our 105 | # documents is below some max size. 106 | collapse_chain = ( 107 | {"context": format_docs} 108 | | PromptTemplate.from_template("Collapse this content:\n\n{context}") 109 | | llm 110 | | StrOutputParser() 111 | ) 112 | 113 | def get_num_tokens(docs): 114 | return llm.get_num_tokens(format_docs(docs)) 115 | 116 | def collapse( 117 | docs, 118 | config, 119 | token_max=4000, 120 | ): 121 | collapse_ct = 1 122 | while get_num_tokens(docs) > token_max: 123 | config["run_name"] = f"Collapse {collapse_ct}" 124 | invoke = partial(collapse_chain.invoke, config=config) 125 | split_docs = split_list_of_docs(docs, get_num_tokens, token_max) 126 | docs = [collapse_docs(_docs, invoke) for _docs in split_docs] 127 | collapse_ct += 1 128 | return docs 129 | 130 | # The chain we'll use to combine our individual document summaries 131 | # (or summaries over subset of documents if we had to collapse the map results) 132 | # into a final summary. 133 | reduce_chain = ( 134 | {"context": format_docs} 135 | | PromptTemplate.from_template("Combine these answers:\n\n{context}") 136 | | llm 137 | | StrOutputParser() 138 | ).with_config(run_name="Reduce") 139 | 140 | # The final full chain 141 | rag_chain = (map_as_doc_chain.map() | collapse | reduce_chain).with_config( 142 | run_name="Map reduce" 143 | ) 144 | 145 | output = rag_chain.invoke(docs, config={"max_concurrency": 5}) 146 | 147 | elif chain_type == "map_rerank": 148 | print("map_reduce chain") 149 | 150 | # Chain to apply to each individual document. Chain 151 | # provides an answer to the question based on the document 152 | # and scores it's confidence in the answer. 153 | class AnswerAndScore(BaseModel): 154 | """Return the answer to the question and a relevance score.""" 155 | 156 | answer: str = Field( 157 | description="The answer to the question, which is based ONLY on the provided context." 158 | ) 159 | score: float = Field( 160 | description="A 0.0-1.0 relevance score, where 1.0 indicates the provided context answers the question completely and 0.0 indicates the provided context does not answer the question at all." 161 | ) 162 | 163 | function = convert_pydantic_to_openai_function(AnswerAndScore) 164 | map_chain = ( 165 | map_prompt 166 | | ChatOpenAI().bind( 167 | temperature=0, functions=[function], function_call={"name": "AnswerAndScore"} 168 | ) 169 | | PydanticOutputFunctionsParser(pydantic_schema=AnswerAndScore) 170 | ).with_config(run_name="Map") 171 | 172 | # Final chain, which after answer and scoring based on 173 | # each doc return the answer with the highest score. 174 | 175 | def top_answer(scored_answers): 176 | return max(scored_answers, key=lambda x: x.score).answer 177 | 178 | # document_prompt = PromptTemplate.from_template("{page_content}") 179 | rag_chain = ( 180 | ( 181 | lambda x: [ 182 | { 183 | "context": format_document(doc, document_prompt), 184 | "question": question, # x["question"] 185 | } 186 | for doc in x["docs"] 187 | ] 188 | ) 189 | | map_chain.map() 190 | | top_answer 191 | ).with_config(run_name="Map rerank") 192 | 193 | output = rag_chain.invoke({"docs": docs, "question": question}) 194 | 195 | elif chain_type == "refine": 196 | # first_prompt = PromptTemplate.from_template("Summarize this content:\n\n{context}") 197 | first_prompt = PromptTemplate.from_template( 198 | "Answer the user question using the context." 199 | "\n\nContext:\n\n{context}\n\nQuestion: " + question 200 | ) 201 | document_prompt = PromptTemplate.from_template("{page_content}") 202 | partial_format_doc = partial(format_document, prompt=document_prompt) 203 | summary_chain = {"context": partial_format_doc} | first_prompt | llm | StrOutputParser() 204 | refine_prompt = PromptTemplate.from_template( 205 | "Answer the user question." 206 | "\n\nHere's your first summary: {prev_response}. " 207 | "\n\nNow add to it based on the following context: {context}\n\nQuestion: " + question 208 | ) 209 | refine_chain = ( 210 | { 211 | "prev_response": itemgetter("prev_response"), 212 | "context": lambda x: partial_format_doc(x["doc"]), 213 | } 214 | | refine_prompt 215 | | llm 216 | | StrOutputParser() 217 | ) 218 | 219 | def refine_loop(docs): 220 | with trace_as_chain_group("refine loop", inputs={"input": docs}) as manager: 221 | summary = summary_chain.invoke( 222 | docs[0], config={"callbacks": manager, "run_name": "initial summary"} 223 | ) 224 | for i, doc in enumerate(docs[1:]): 225 | summary = refine_chain.invoke( 226 | {"prev_response": summary, "doc": doc}, 227 | config={"callbacks": manager, "run_name": f"refine {i}"}, 228 | ) 229 | manager.on_chain_end({"output": summary}) 230 | return summary 231 | 232 | output = refine_loop(docs) 233 | return output 234 | 235 | 236 | def agent_lc_factory(chain_type, llm, search_kwargs, search_type, vectorstore): 237 | 238 | retriever = vectorstore.as_retriever(search_type=search_type, search_kwargs=search_kwargs) 239 | retrieval_qa_chain = RetrievalQA.from_chain_type( 240 | llm=llm, chain_type=chain_type, retriever=retriever 241 | ) 242 | return retrieval_qa_chain 243 | -------------------------------------------------------------------------------- /rag_assistant/legacy_ux/1_RAG_agent_with_LC.py: -------------------------------------------------------------------------------- 1 | from json import JSONDecodeError 2 | from typing import Union 3 | 4 | import chromadb 5 | import streamlit as st 6 | from langchain.agents import AgentExecutor, create_structured_chat_agent 7 | from langchain_community.vectorstores.chroma import Chroma 8 | from langchain_core.documents import Document 9 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 10 | from langchain_core.tools import Tool, ToolException 11 | from langsmith import traceable 12 | from streamlit.runtime.uploaded_file_manager import UploadedFile 13 | 14 | from utils.config_loader import load_config 15 | from utils.utilsdoc import load_doc 16 | 17 | from utils.utilsrag_lc import agent_lc_factory 18 | 19 | from utils.utilsllm import load_model, load_embeddings 20 | 21 | from dotenv import load_dotenv, find_dotenv 22 | 23 | from langchain_core.runnables.history import RunnableWithMessageHistory 24 | from langchain_community.chat_message_histories import ( 25 | StreamlitChatMessageHistory, 26 | ) 27 | 28 | from langchain_core.tracers.context import tracing_v2_enabled 29 | 30 | # EXTERNALISATION OF PROMPTS TO HAVE THEIR OWN VERSIONING 31 | from shared.rag_prompts import __structured_chat_agent__, human 32 | 33 | load_dotenv(find_dotenv()) 34 | 35 | config = load_config() 36 | 37 | app_name = config['DEFAULT']['APP_NAME'] 38 | LLM_MODEL = config['MODEL_PROVIDER']['MODEL_PROVIDER'] 39 | 40 | topics = ["Cloud", "Security", "GenAI", "Application", "Architecture", "AWS", "Other"] 41 | 42 | model_to_index = { 43 | "OPENAI": 0, 44 | "MISTRAL": 1, 45 | "BEDROCK": 2 46 | } 47 | 48 | 49 | def load_sidebar(): 50 | with st.sidebar: 51 | st.header("Parameters") 52 | st.sidebar.subheader("LangChain model provider") 53 | st.sidebar.checkbox("OpenAI", LLM_MODEL == "OPENAI", disabled=True) 54 | st.sidebar.checkbox("Mistral", LLM_MODEL == "MISTRAL", disabled=True) 55 | st.sidebar.checkbox("Bedrock", LLM_MODEL == "BEDROCK", disabled=True) 56 | 57 | 58 | def _load_doc(pdfs: Union[list[UploadedFile], None, UploadedFile]) -> list[Document]: 59 | # loader = PyPDFDirectoryLoader("data/sources/pdf/") 60 | # all_docs = loader.load() 61 | all_docs = load_doc(pdfs) 62 | return all_docs 63 | 64 | 65 | def configure_agent(all_docs: list[Document], model_name, chain_type, search_type="similarity", search_kwargs=None): 66 | 67 | embeddings_rag = load_embeddings(model_name) 68 | llm_rag = load_model(model_name, temperature=0.1) 69 | 70 | chroma_client = chromadb.EphemeralClient() 71 | 72 | vectorstore = Chroma.from_documents( 73 | documents=all_docs, 74 | embedding=embeddings_rag, 75 | client=chroma_client, 76 | collection_name="RAG_LC_Agent" 77 | ) 78 | 79 | # vectorstore = get_store() # embeddings_rag, collection_name="RAG_LC_Agent") 80 | retrieval_qa_chain = agent_lc_factory(chain_type, llm_rag, search_kwargs, 81 | search_type, vectorstore) 82 | 83 | def _handle_error(error: ToolException) -> str: 84 | if error == JSONDecodeError: 85 | return "Reformat in JSON and try again" 86 | elif error.args[0].startswith("Too many arguments to single-input tool"): 87 | return "Format in a SINGLE STRING. DO NOT USE MULTI-ARGUMENTS INPUT." 88 | return ( 89 | "The following errors occurred during tool execution:" 90 | + error.args[0] 91 | + "Please try another tool.") 92 | 93 | lc_tools = [ 94 | Tool( 95 | name=f"Knowledge Agent (LC)", 96 | func=retrieval_qa_chain, 97 | description=f"""Useful when you need to answer questions on {topics}. " 98 | "DO NOT USE MULTI-ARGUMENTS INPUT.""", 99 | handle_tool_error=_handle_error, 100 | ), 101 | ] 102 | ## START LANGCHAIN 103 | # MODEL FOR LANGCHAIN IS DEFINE GLOBALLY IN CONF/CONFIG.INI 104 | # defaulting to "gpt-4-turbo" because it is the only one resilient 105 | llm_agent = load_model("gpt-4o") 106 | 107 | prompt = ChatPromptTemplate.from_messages( 108 | [ 109 | ("system", __structured_chat_agent__), 110 | MessagesPlaceholder("rag_chat_history", optional=True), 111 | ("human", human), 112 | ] 113 | ) 114 | 115 | # create_react_agent 116 | agent = create_structured_chat_agent( 117 | llm=llm_agent, 118 | tools=lc_tools, 119 | prompt=prompt 120 | ) 121 | 122 | # 123 | # TODO 124 | # sometimes received "Parsing LLM output produced both a final answer and a parse-able action" with mistral 125 | # add a handle_parsing_errors, reduce the case but still appears time to time. 126 | agent_executor = AgentExecutor( 127 | agent=agent, 128 | tools=lc_tools, 129 | handle_parsing_errors="Check your output and make sure it conforms to required format!" 130 | "Format is Action:```$JSON_BLOB``` then Observation" 131 | " Do not output an action and a final answer at the same time.") 132 | ## END LANGCHAIN 133 | return agent_executor 134 | 135 | 136 | @traceable(run_type="chain", project_name="RAG Assistant", tags=["LangChain", "RAG", "Agent"]) 137 | def call_chain(chain_with_history, prompt): 138 | config = {"configurable": {"session_id": "any"}} 139 | response = chain_with_history.invoke( 140 | input={ 141 | "input": prompt 142 | }, 143 | config=config 144 | ) 145 | answer = f"🦜: {response['output']}" 146 | st.write(answer) 147 | 148 | 149 | def main(): 150 | 151 | st.title("Question Answering Assistant (RAG)") 152 | 153 | load_sidebar() 154 | 155 | model_index = model_to_index[LLM_MODEL] 156 | agent_model = st.sidebar.radio("RAG Agent LLM Provider", ["OPENAI", "MISTRAL", "BEDROCK"], index=model_index) 157 | 158 | st.sidebar.subheader("RAG Agent Model") 159 | model_name_gpt = st.sidebar.radio("OpenAI Model", ["gpt-3.5-turbo", "gpt-4-turbo", "gpt-4o"], 160 | captions=["GPT 3.5 Turbo", "GPT 4 Turbo", "GPT 4 Omni"], 161 | index=0, disabled=agent_model != "OPENAI") 162 | 163 | model_name_mistral = st.sidebar.radio("Mistral Model", ["mistral-small-latest", "mistral-medium-latest", "mistral-large-latest"], 164 | captions=["Mistral 7b", "Mixtral", "Mistral Large"], 165 | index=2, disabled=agent_model != "MISTRAL") 166 | 167 | model_name_bedrock = st.sidebar.radio("Bedrock Model", ["mistral.mistral-large-2402-v1:0", 168 | "anthropic.claude-3-sonnet-20240229-v1:0"], 169 | captions=["Mistral Large", 170 | "Claude 3 Sonnet"], 171 | index=0, disabled=agent_model != "BEDROCK") 172 | 173 | model_name = None 174 | if agent_model == "MISTRAL": 175 | model_name = model_name_mistral 176 | elif agent_model == "OPENAI": 177 | model_name = model_name_gpt 178 | elif agent_model == "BEDROCK": 179 | model_name = model_name_bedrock 180 | 181 | chain_type = st.sidebar.radio("Chain type (LangChain)", 182 | ["stuff", "map_reduce", "refine", "map_rerank"]) 183 | 184 | st.sidebar.subheader("Search params (LangChain)") 185 | k = st.sidebar.slider('Number of relevant chunks', 2, 10, 4, 1) 186 | 187 | search_type = st.sidebar.radio("Search Type", ["similarity", "mmr", 188 | "similarity_score_threshold"]) 189 | 190 | pdfs = st.file_uploader("Document(s) à transmettre", type=['pdf', 'txt', 'md'], accept_multiple_files=True) 191 | 192 | disabled = True 193 | 194 | docs = [] 195 | # if st.button("Transmettre", disabled=disabled): 196 | # calling an internal function for adapting LC or LI Document 197 | docs = _load_doc(pdfs) 198 | 199 | if (docs is not None) and (len(docs)): 200 | disabled = False 201 | 202 | if not disabled: 203 | 204 | history = StreamlitChatMessageHistory(key="rag_chat_history") 205 | if len(history.messages) == 0: 206 | history.add_ai_message("What do you want to know?") 207 | 208 | view_messages = st.expander("View the message contents in session state") 209 | 210 | st.header("RAG agent with LangChain") 211 | agent = configure_agent(docs, model_name, chain_type, search_type, {"k": k}) 212 | 213 | chain_with_history = RunnableWithMessageHistory( 214 | agent, 215 | lambda session_id: history, 216 | input_messages_key="input", 217 | history_messages_key="rag_chat_history", 218 | ) 219 | 220 | # Display chat messages from history on app rerun 221 | for message in history.messages: 222 | with st.chat_message(message.type): 223 | st.markdown(message.content) 224 | 225 | # Accept user input 226 | if prompt := st.chat_input(): 227 | # Add user message to chat history 228 | # Note: new messages are saved to history automatically by Langchain during run 229 | # st.session_state.messages.append({"role": "user", "content": prompt}) 230 | # Display user message in chat message container 231 | with st.chat_message("user"): 232 | st.markdown(prompt) 233 | 234 | # Display assistant response in chat message container 235 | with st.chat_message("assistant"): 236 | # Display assistant response in chat message container 237 | # with tracing_v2_enabled(project_name="Applied AI RAG Assistant", 238 | # tags=["LangChain", "Agent"]): 239 | call_chain(chain_with_history, prompt) 240 | 241 | # Draw the messages at the end, so newly generated ones show up immediately 242 | with view_messages: 243 | """ 244 | Message History initialized with: 245 | ```python 246 | msgs = StreamlitChatMessageHistory(key="rag_chat_history") 247 | ``` 248 | 249 | Contents of `st.session_state.rag_chat_history`: 250 | """ 251 | view_messages.json(st.session_state.rag_chat_history) 252 | 253 | 254 | if __name__ == "__main__": 255 | main() 256 | --------------------------------------------------------------------------------