├── .env.template
├── .github
    ├── badges
    │   └── coverage.json
    ├── compose.test.yml
    ├── config.test.yml
    └── workflows
    │   ├── build_and_deploy.yml
    │   ├── build_and_tag.yml
    │   ├── codeql_scan.yml
    │   ├── run_tests.yml
    │   └── secrets_scan.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── app
    ├── .coveragerc
    ├── Dockerfile
    ├── alembic.ini
    ├── alembic
    │   ├── env.py
    │   ├── script.py.mako
    │   └── versions
    │   │   ├── 2025_03_11_1552-9a9c82ec2470_create_usages_table.py
    │   │   ├── 2025_04_02_1603-e78eaed1bcb2_add_auth.py
    │   │   ├── 2025_04_04_1641-8c5ae2a3d4d0_remove_delete_cascade_usage_token_id.py
    │   │   ├── 2025_04_15_1513-a4ac45e7c990_remove_unique_name_on_token_and_.py
    │   │   ├── 2025_04_17_1201-896cbf4c2cbb_add_new_columns_to_usages_table.py
    │   │   ├── 2025_05_12_1613-5553fa60acfa_remove_default_role_attribut.py
    │   │   └── 2025_05_28_1541-752279f74929_add_cost_and_budget_colonne.py
    ├── clients
    │   ├── __init__.py
    │   ├── database
    │   │   ├── __init__.py
    │   │   └── _qdrantclient.py
    │   ├── mcp
    │   │   ├── __init__.py
    │   │   └── _secretshellbridgeclient.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── _albertmodelclient.py
    │   │   ├── _basemodelclient.py
    │   │   ├── _openaimodelclient.py
    │   │   ├── _teimodelclient.py
    │   │   └── _vllmmodelclient.py
    │   ├── parser
    │   │   ├── __init__.py
    │   │   ├── _baseparserclient.py
    │   │   └── _markerparserclient.py
    │   └── web_search
    │   │   ├── __init__.py
    │   │   ├── _basewebsearchclient.py
    │   │   ├── _bravewebsearchclient.py
    │   │   └── _duckduckgowebsearchclient.py
    ├── endpoints
    │   ├── __init__.py
    │   ├── audio.py
    │   ├── auth.py
    │   ├── chat.py
    │   ├── chunks.py
    │   ├── collections.py
    │   ├── completions.py
    │   ├── documents.py
    │   ├── embeddings.py
    │   ├── files.py
    │   ├── mcp.py
    │   ├── models.py
    │   ├── ocr.py
    │   ├── parse.py
    │   ├── rerank.py
    │   └── search.py
    ├── helpers
    │   ├── __init__.py
    │   ├── _accesscontroller.py
    │   ├── _documentmanager.py
    │   ├── _identityaccessmanager.py
    │   ├── _limiter.py
    │   ├── _parsermanager.py
    │   ├── _streamingresponsewithstatuscode.py
    │   ├── _usagetokenizer.py
    │   ├── _websearchmanager.py
    │   ├── agents
    │   │   ├── __init__.py
    │   │   └── _agentsmanager.py
    │   ├── core.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── chunkers
    │   │   │   ├── __init__.py
    │   │   │   ├── _basesplitter.py
    │   │   │   ├── _nochunker.py
    │   │   │   └── _recursivecharactertextsplitter.py
    │   ├── documents
    │   │   └── __init__.py
    │   └── models
    │   │   ├── __init__.py
    │   │   ├── _modelregistry.py
    │   │   └── routers
    │   │       ├── __init__.py
    │   │       ├── _basemodelrouter.py
    │   │       ├── _immediatemodelrouter.py
    │   │       ├── _modelrouter.py
    │   │       └── strategies
    │   │           ├── __init__.py
    │   │           ├── _baserountingstrategy.py
    │   │           ├── _roundrobinroutingstrategy.py
    │   │           └── _shuffleroutingstrategy.py
    ├── main.py
    ├── schemas
    │   ├── __init__.py
    │   ├── audio.py
    │   ├── auth.py
    │   ├── chat.py
    │   ├── chunks.py
    │   ├── collections.py
    │   ├── completions.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── auth.py
    │   │   ├── context.py
    │   │   ├── documents.py
    │   │   ├── models.py
    │   │   ├── settings.py
    │   │   └── usage.py
    │   ├── documents.py
    │   ├── embeddings.py
    │   ├── files.py
    │   ├── mcp.py
    │   ├── models.py
    │   ├── ocr.py
    │   ├── parse.py
    │   ├── rerank.py
    │   ├── search.py
    │   └── usage.py
    ├── sql
    │   ├── models.py
    │   └── session.py
    ├── tests
    │   ├── integ
    │   │   ├── assets
    │   │   │   ├── audio.mp3
    │   │   │   ├── audio.wav
    │   │   │   ├── html.html
    │   │   │   ├── json.json
    │   │   │   ├── json_wrong_format.json
    │   │   │   ├── markdown.md
    │   │   │   ├── pdf.pdf
    │   │   │   └── pdf_too_large.pdf
    │   │   ├── conftest.py
    │   │   ├── fixtures
    │   │   │   └── fixtures.py
    │   │   ├── snapshots
    │   │   │   ├── test_audio
    │   │   │   │   ├── test_audio_transcriptions_invalid_model
    │   │   │   │   │   └── audio_transcriptions_invalid_model
    │   │   │   │   ├── test_audio_transcriptions_mp3
    │   │   │   │   │   └── audio_transcriptions_mp3
    │   │   │   │   ├── test_audio_transcriptions_text_output
    │   │   │   │   │   └── audio_transcriptions_text_output
    │   │   │   │   └── test_audio_transcriptions_wav
    │   │   │   │   │   └── audio_transcriptions_wav
    │   │   │   └── test_ocr
    │   │   │   │   ├── test_ocr_custom_dpi
    │   │   │   │       └── ocr_custom_dpi
    │   │   │   │   ├── test_ocr_invalid_file_type
    │   │   │   │       └── ocr_invalid_file_type
    │   │   │   │   ├── test_ocr_pdf_successful
    │   │   │   │       └── ocr_pdf_successful
    │   │   │   │   ├── test_ocr_too_large_file
    │   │   │   │       └── ocr_too_large_file
    │   │   │   │   └── test_ocr_without_authentication
    │   │   │   │       └── ocr_without_authentication
    │   │   ├── test_audio.py
    │   │   ├── test_auth.py
    │   │   ├── test_chat.py
    │   │   ├── test_chunks.py
    │   │   ├── test_collections.py
    │   │   ├── test_documents.py
    │   │   ├── test_embeddings.py
    │   │   ├── test_files.py
    │   │   ├── test_identityaccessmanager.py
    │   │   ├── test_mcp.py
    │   │   ├── test_models.py
    │   │   ├── test_multiagents.py
    │   │   ├── test_ocr.py
    │   │   ├── test_parsing.py
    │   │   ├── test_rerank.py
    │   │   ├── test_router.py
    │   │   ├── test_search.py
    │   │   ├── test_sql_models.py
    │   │   ├── test_usage_decorator.py
    │   │   ├── test_usagesmiddleware.py
    │   │   ├── test_websearchmanager.py
    │   │   └── utils
    │   │   │   └── snapshot_assertions.py
    │   └── unit
    │   │   └── test_mcp_usecase.py
    └── utils
    │   ├── __init__.py
    │   ├── carbon.py
    │   ├── context.py
    │   ├── depends.py
    │   ├── exceptions.py
    │   ├── hooks_decorator.py
    │   ├── lifespan.py
    │   ├── logging.py
    │   ├── multiagents.py
    │   ├── settings.py
    │   └── variables.py
├── compose.dev.yml
├── compose.prod.yml
├── compose.yml
├── config.example.yml
├── docs
    ├── architecture.md
    ├── assets
    │   ├── albert_api.excalidraw
    │   ├── collections_001.png
    │   ├── collections_002.png
    │   ├── collections_003.png
    │   ├── collections_004.png
    │   ├── collections_005.png
    │   ├── deployment_001.png
    │   ├── iam_001.png
    │   └── logo.png
    ├── budget.md
    ├── deployment.md
    ├── iam.md
    ├── models.md
    ├── routing.md
    ├── search.md
    └── tutorials
    │   ├── RAG_with_parse.ipynb
    │   ├── audio_transcriptions.ipynb
    │   ├── chat_completions.ipynb
    │   ├── import_knowledge_database.ipynb
    │   ├── models.ipynb
    │   ├── pdf_ocr.ipynb
    │   └── retrieval_augmented_generation.ipynb
├── mcp
    ├── config.json
    └── data_gouv_fr_mcp_server
    │   ├── README.md
    │   ├── infra
    │       └── clients
    │       │   └── http_client.py
    │   ├── main.py
    │   └── pyproject.toml
├── pyproject.toml
├── scripts
    ├── postgres_entrypoint.sh
    ├── startup_api.sh
    └── startup_ui.sh
└── ui
    ├── Dockerfile
    ├── alembic.ini
    ├── alembic
        ├── env.py
        ├── script.py.mako
        └── versions
        │   ├── 2025_03_25_1347-647433280fa7_init_database.py
        │   ├── 2025_04_07_1410-3ad8934ab327_remove_expires_at_column.py
        │   └── 2025_04_18_1923-c0bfeeca22a9_add_unique_api_token_id.py
    ├── backend
        ├── __init__.py
        ├── account.py
        ├── admin.py
        ├── chat.py
        ├── common.py
        ├── documents.py
        ├── login.py
        ├── sql
        │   ├── __init__.py
        │   ├── models.py
        │   └── session.py
        └── summarize.py
    ├── frontend
        ├── account.py
        ├── admin.py
        ├── chat.py
        ├── documents.py
        ├── header.py
        ├── summarize.py
        ├── transcription.py
        └── utils.py
    ├── main.py
    ├── settings.py
    └── variables.py


/.env.template:
--------------------------------------------------------------------------------
 1 | CONFIG_FILE=config.yml
 2 | OPENAI_API_KEY=
 3 | 
 4 | # To run locally
 5 | #POSTGRES_HOST=localhost
 6 | #REDIS_HOST=localhost
 7 | #QDRANT_HOST=localhost
 8 | 
 9 | # To run integration tests
10 | #ALBERT_API_KEY=
11 | #BRAVE_API_KEY=


--------------------------------------------------------------------------------
/.github/badges/coverage.json:
--------------------------------------------------------------------------------
1 | {"schemaVersion":1,"label":"coverage","message":"87.38%","color":"green"}
2 | 


--------------------------------------------------------------------------------
/.github/compose.test.yml:
--------------------------------------------------------------------------------
 1 | name: albert-test
 2 | services:
 3 |   api:
 4 |     build:
 5 |       context: ..
 6 |       dockerfile: app/Dockerfile
 7 |     platform: linux/amd64
 8 |     restart: always
 9 |     environment:
10 |       - COVERAGE_RCFILE=./app/.coveragerc
11 |       - BRAVE_API_KEY=${BRAVE_API_KEY}
12 |       - ALBERT_API_KEY=${ALBERT_API_KEY}
13 |       - POSTGRES_HOST=postgres
14 |       - REDIS_HOST=redis
15 |       - QDRANT_HOST=qdrant
16 |       - POSTGRES_PORT=5432
17 |       - REDIS_PORT=6379
18 |       - QDRANT_PORT=6333
19 |       - QDRANT_GRPC_PORT=6334
20 |     ports:
21 |       - 8000:8000
22 |     volumes:
23 |       - ./config.test.yml:/config.yml:ro
24 |     depends_on:
25 |       redis:
26 |         condition: service_healthy
27 |       postgres:
28 |         condition: service_healthy
29 |       qdrant:
30 |         condition: service_healthy
31 | 
32 |   postgres:
33 |     extends:
34 |       file: ../compose.yml
35 |       service: postgres 
36 |     ports: !override
37 |       - 8432:5432
38 | 
39 |   qdrant:
40 |     extends:
41 |       file: ../compose.yml
42 |       service: qdrant
43 |     ports: !override
44 |       - 8333:6333
45 |       - 8334:6334
46 | 
47 |   redis:
48 |     extends:
49 |       file: ../compose.yml
50 |       service: redis
51 |     ports: !override
52 |       - 8335:6379
53 | 
54 |   mcp-bridge:
55 |     extends:
56 |       file: ../compose.yml
57 |       service: mcp-bridge
58 |     ports: !override
59 |       - "9875:8000"
60 | 
61 | volumes:
62 |   postgres:
63 |   redis:
64 |   qdrant:
65 | 


--------------------------------------------------------------------------------
/.github/workflows/build_and_deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Build and deploy when pushing on main
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   build-and-push:
10 |     name: Build and push from ${{ github.ref_name }}/${{ github.sha }}
11 |     runs-on: ubuntu-latest
12 |     env:
13 |       APP_IMAGE_NAME: ghcr.io/${{ github.repository }}/app
14 |       UI_IMAGE_NAME: ghcr.io/${{ github.repository }}/ui
15 |       IMAGE_TAG: ${{ github.sha }}
16 |     outputs:
17 |       commit_title: ${{ steps.get_head_commit_title.outputs.title }}
18 |     steps:
19 |       - name: Checkout repository
20 |         uses: actions/checkout@v4
21 | 
22 |       - id: get_head_commit_title
23 |         run: echo "title=$(git log --format=%B -n 1 HEAD | head -n 1)" >> $GITHUB_OUTPUT
24 | 
25 |       - name: Log in to GitHub Container Registry
26 |         uses: docker/login-action@v3
27 |         with:
28 |           registry: ghcr.io
29 |           username: ${{ github.actor }}
30 |           password: ${{ secrets.GITHUB_TOKEN }}
31 | 
32 |       - name: Set up Docker Buildx
33 |         uses: docker/setup-buildx-action@v3
34 | 
35 |       - name: Build and push app
36 |         uses: docker/build-push-action@v6
37 |         with:
38 |           context: .
39 |           file: ./app/Dockerfile
40 |           push: true
41 |           tags: ${{ env.APP_IMAGE_NAME }}:${{ env.IMAGE_TAG }},${{ env.APP_IMAGE_NAME }}:latest
42 |           cache-from: type=gha
43 |           cache-to: type=gha,mode=max
44 |       
45 |       - name: Build and push ui
46 |         uses: docker/build-push-action@v6
47 |         with:
48 |           context: .
49 |           file: ./ui/Dockerfile
50 |           push: true
51 |           tags: ${{ env.UI_IMAGE_NAME }}:${{ env.IMAGE_TAG }},${{ env.UI_IMAGE_NAME }}:latest
52 |           cache-from: type=gha
53 |           cache-to: type=gha,mode=max
54 | 
55 |   deploy-dev:
56 |     name: Deploy from ${{ github.ref_name }}/${{ github.sha }}
57 |     runs-on: ubuntu-latest
58 |     needs: build-and-push
59 |     steps:
60 |       - name: Trigger dev deployment
61 |         run: |
62 |           RESPONSE="$(curl --request POST \
63 |             --form token=${{ secrets.GITLAB_CI_TOKEN }} \
64 |             --form ref=main \
65 |             --form 'variables[pipeline_name]=${{ github.event.repository.name }} - ${{ needs.build-and-push.outputs.commit_title }}' \
66 |             --form 'variables[docker_image_tag]=latest' \
67 |             --form 'variables[application_to_deploy]=${{ github.event.repository.name }}' \
68 |             --form 'variables[deployment_environment]=dev' \
69 |             'https://gitlab.com/api/v4/projects/58117805/trigger/pipeline')"
70 | 
71 |           if echo "$RESPONSE" | grep -q '"status":"created"'; then
72 |             echo $RESPONSE
73 |           else
74 |             echo $RESPONSE
75 |             exit 1
76 |           fi
77 | 


--------------------------------------------------------------------------------
/.github/workflows/build_and_tag.yml:
--------------------------------------------------------------------------------
 1 | name: Build and create release image tag
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published, edited]
 6 | 
 7 | jobs:
 8 |   build-and-push:
 9 |     name: Build and push from ${{ github.ref_name }}/${{ github.event.release.tag_name }}
10 |     runs-on: ubuntu-latest
11 |     env:
12 |       APP_IMAGE_NAME: ghcr.io/${{ github.repository }}/app
13 |       UI_IMAGE_NAME: ghcr.io/${{ github.repository }}/ui
14 |       IMAGE_TAG: ${{ github.event.release.tag_name }}
15 |     outputs:
16 |       commit_title: ${{ steps.get_head_commit_title.outputs.title }}
17 |     steps:
18 |       - name: Checkout repository
19 |         uses: actions/checkout@v4
20 | 
21 |       - id: get_head_commit_title
22 |         run: echo "title=$(git log --format=%B -n 1 HEAD | head -n 1)" >> $GITHUB_OUTPUT
23 | 
24 |       - name: Log in to GitHub Container Registry
25 |         uses: docker/login-action@v3
26 |         with:
27 |           registry: ghcr.io
28 |           username: ${{ github.actor }}
29 |           password: ${{ secrets.GITHUB_TOKEN }}
30 | 
31 |       - name: Set up Docker Buildx
32 |         uses: docker/setup-buildx-action@v3
33 | 
34 |       - name: Build and push app
35 |         uses: docker/build-push-action@v6
36 |         with:
37 |           context: .
38 |           file: ./app/Dockerfile
39 |           push: true
40 |           tags: ${{ env.APP_IMAGE_NAME }}:${{ env.IMAGE_TAG }}
41 |           cache-from: type=gha
42 |           cache-to: type=gha,mode=max
43 |       
44 |       - name: Build and push ui
45 |         uses: docker/build-push-action@v6
46 |         with:
47 |           context: .
48 |           file: ./ui/Dockerfile
49 |           push: true
50 |           tags: ${{ env.UI_IMAGE_NAME }}:${{ env.IMAGE_TAG }}
51 |           cache-from: type=gha
52 |           cache-to: type=gha,mode=max
53 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql_scan.yml:
--------------------------------------------------------------------------------
 1 | name: CodeQL scanning
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 |   push:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   analyze:
13 |     name: CodeQL Analysis
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       security-events: write
17 |       actions: read
18 |       contents: read
19 | 
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Initialize CodeQL
25 |         uses: github/codeql-action/init@v3
26 |         with:
27 |           languages: python
28 |           queries: security-and-quality
29 | 
30 |       - name: Perform CodeQL Analysis
31 |         uses: github/codeql-action/analyze@v3
32 |         with:
33 |           category: "/language:python"


--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Integration tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   workflow_call: # Add this to make the workflow reusable
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v3
16 |   
17 |     - name: Set up Docker Compose
18 |       run: |
19 |         docker compose --file ./.github/compose.test.yml up --detach
20 |       env:
21 |         BRAVE_API_KEY: ${{ secrets.BRAVE_API_KEY }}
22 |         ALBERT_API_KEY: ${{ secrets.ALBERT_API_KEY }}
23 |     
24 |     - name: Wait for API to start
25 |       run: |
26 |         echo $(ls -la)
27 |         for i in {1..30}; do
28 |           curl -s http://localhost:8000/health -H "Authorization: Bearer changeme" > /dev/null && echo "API is ready" && break || echo "Waiting for API..." && sleep 2;
29 |         done
30 |         echo $(docker logs albert-test-api-1)
31 |   
32 |     - name: Wait for PostgreSQL
33 |       run: |
34 |         for i in {1..30}; do
35 |           nc -z localhost 8432 && echo "PostgreSQL is ready" && break || echo "Waiting for PostgreSQL..." && sleep 2;
36 |         done
37 | 
38 |     - name: Run tests
39 |       run: |
40 |         docker exec albert-test-api-1 pytest app/tests --cov=./app --cov-report=xml
41 | 
42 |     - name: Create coverage badge
43 |       run: |
44 |         mkdir -p .github/badges
45 |         # Extract coverage percentage from coverage.xml
46 |         COVERAGE=$(docker exec albert-test-api-1 python -c "import xml.etree.ElementTree as ET; print(ET.parse('app/coverage.xml').getroot().get('line-rate'))")
47 |         COVERAGE_PCT=$(printf "%.2f" $(echo "${COVERAGE} * 100" | bc))
48 |         echo "{\"schemaVersion\":1,\"label\":\"coverage\",\"message\":\"${COVERAGE_PCT}%\",\"color\":\"$(if (( $(echo "${COVERAGE_PCT} >= 80" | bc -l) )); then echo "green"; elif (( $(echo "${COVERAGE_PCT} >= 70" | bc -l) )); then echo "yellow"; else echo "red"; fi)\"}" > .github/badges/coverage.json
49 | 
50 |     - name: Commit coverage badge
51 |       uses: stefanzweifel/git-auto-commit-action@v4
52 |       with:
53 |         commit_message: Update coverage badge
54 |         file_pattern: .github/badges/coverage.json
55 | 
56 |     - name: Tear down Docker Compose
57 |       if: always()
58 |       run: |
59 |         docker compose down


--------------------------------------------------------------------------------
/.github/workflows/secrets_scan.yml:
--------------------------------------------------------------------------------
 1 | name: Secrets scanning
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |   workflow_call:
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - name: Checkout code
13 |       uses: actions/checkout@v4
14 |       with:
15 |         fetch-depth: 0
16 |         
17 |     - name: Secret Scanning with TruffleHog
18 |       uses: trufflesecurity/trufflehog@main
19 |       with:
20 |         extra_args: |
21 |           --results=verified,unknown
22 |           --exclude-detectors=Postgres
23 |     - name: Install git-secrets
24 |       run: |
25 |         git clone https://github.com/awslabs/git-secrets.git
26 |         cd git-secrets
27 |         sudo make install
28 |         
29 |     - name: Setup git-secrets
30 |       run: |
31 |         git secrets --install
32 |         git secrets --add 'api.key.{1,5}[a-zA-Z0-9_-]{32,}'
33 |         
34 |     - name: Scan with git-secrets
35 |       run: |
36 |         git secrets --scan


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     # Ruff version.
 4 |     rev: v0.6.5
 5 |     hooks:
 6 |       # Run the linter.
 7 |       - id: ruff
 8 |         types_or: [ python, pyi ]
 9 |         args: [ --fix ]
10 |       # Run the formatter.
11 |       - id: ruff-format
12 |         types_or: [ python, pyi ]
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 DINUM
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/app/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | data_file = ./app/.coverage
3 | 
4 | [xml]
5 | output = ./app/coverage.xml
6 | 


--------------------------------------------------------------------------------
/app/Dockerfile:
--------------------------------------------------------------------------------
 1 | # First, build the application in the `/app` directory.
 2 | # See `Dockerfile` for details.
 3 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
 4 | ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 5 | 
 6 | # Disable Python downloads, because we want to use the system interpreter
 7 | # across both images. If using a managed Python version, it needs to be
 8 | # copied from the build image into the final image; see `standalone.Dockerfile`
 9 | # for an example.
10 | ENV UV_PYTHON_DOWNLOADS=0
11 | # Install build dependencies
12 | RUN apt-get update && apt-get install -y \
13 |     libpq-dev \
14 |     gcc \
15 |     python3-dev \
16 |     && rm -rf /var/lib/apt/lists/*
17 | 
18 | WORKDIR /
19 | # Copy project files
20 | COPY ./app/ /app
21 | RUN --mount=type=cache,target=/root/.cache/uv \
22 |     uv venv
23 | RUN --mount=type=cache,target=/root/.cache/uv \
24 |     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
25 |     uv pip install ".[app,test]"
26 | 
27 | # Final stage
28 | FROM python:3.12-slim
29 | 
30 | RUN groupadd --gid 1100 albert && \
31 |     useradd --home /app --gid 1100 --uid 1100 albert
32 | 
33 | # Only runtime dependencies
34 | RUN apt-get update && apt-get install -y \
35 |     libpq5 \
36 |     poppler-utils \
37 |     && rm -rf /var/lib/apt/lists/*
38 | 
39 | COPY scripts/startup_api.sh /startup.sh
40 | RUN chown albert:albert /startup.sh
41 | RUN chmod u+x /startup.sh
42 | 
43 | # Set a non-root user
44 | USER albert
45 | WORKDIR /
46 | 
47 | # Copy application from builder
48 | COPY --from=builder --chown=albert:albert /app /app
49 | COPY --from=builder --chown=albert:albert /.venv /.venv
50 | ENV PATH="/.venv/bin:${PATH}"
51 | ENV PYTHONPATH="/app:${PYTHONPATH}"
52 | 
53 | # Launch the application
54 | CMD ["/startup.sh"]
55 | 


--------------------------------------------------------------------------------
/app/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from alembic import context
 4 | from sqlalchemy import engine_from_config, pool
 5 | 
 6 | from app.sql.models import Base
 7 | from app.utils.settings import settings
 8 | 
 9 | config = context.config
10 | config.set_main_option(name="sqlalchemy.url", value=settings.databases.sql.args.get("url").replace("+asyncpg", "").replace("+aiosqlite", ""))
11 | 
12 | if config.config_file_name is not None:
13 |     fileConfig(config.config_file_name)
14 | 
15 | target_metadata = Base.metadata
16 | 
17 | 
18 | def run_migrations_offline() -> None:
19 |     """Run migrations in 'offline' mode.
20 | 
21 |     This configures the context with just a URL
22 |     and not an Engine, though an Engine is acceptable
23 |     here as well.  By skipping the Engine creation
24 |     we don't even need a DBAPI to be available.
25 | 
26 |     Calls to context.execute() here emit the given string to the
27 |     script output.
28 | 
29 |     """
30 |     url = config.get_main_option("sqlalchemy.url")
31 |     context.configure(
32 |         url=url,
33 |         target_metadata=target_metadata,
34 |         literal_binds=True,
35 |         dialect_opts={"paramstyle": "named"},
36 |     )
37 | 
38 |     with context.begin_transaction():
39 |         context.run_migrations()
40 | 
41 | 
42 | def run_migrations_online() -> None:
43 |     """Run migrations in 'online' mode.
44 | 
45 |     In this scenario we need to create an Engine
46 |     and associate a connection with the context.
47 | 
48 |     """
49 |     connectable = engine_from_config(
50 |         config.get_section(config.config_ini_section, {}),
51 |         prefix="sqlalchemy.",
52 |         poolclass=pool.NullPool,
53 |     )
54 | 
55 |     with connectable.connect() as connection:
56 |         context.configure(connection=connection, target_metadata=target_metadata)
57 | 
58 |         with context.begin_transaction():
59 |             context.run_migrations()
60 | 
61 | 
62 | if context.is_offline_mode():
63 |     run_migrations_offline()
64 | else:
65 |     run_migrations_online()
66 | 


--------------------------------------------------------------------------------
/app/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from typing import Sequence, Union
 9 | 
10 | from alembic import op
11 | import sqlalchemy as sa
12 | ${imports if imports else ""}
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = ${repr(up_revision)}
16 | down_revision: Union[str, None] = ${repr(down_revision)}
17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19 | 
20 | 
21 | def upgrade() -> None:
22 |     """Upgrade schema."""
23 |     ${upgrades if upgrades else "pass"}
24 | 
25 | 
26 | def downgrade() -> None:
27 |     """Downgrade schema."""
28 |     ${downgrades if downgrades else "pass"}
29 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_03_11_1552-9a9c82ec2470_create_usages_table.py:
--------------------------------------------------------------------------------
 1 | """create usages table
 2 | 
 3 | Revision ID: 9a9c82ec2470
 4 | Revises:
 5 | Create Date: 2025-03-11 15:52:43.842572
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "9a9c82ec2470"
17 | down_revision: Union[str, None] = None
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     op.create_table(
25 |         "usage",
26 |         sa.Column("id", sa.Integer(), nullable=False),
27 |         sa.Column("datetime", sa.DateTime(), nullable=False),
28 |         sa.Column("duration", sa.Integer(), nullable=True),
29 |         sa.Column("user", sa.String(), nullable=True),
30 |         sa.Column("endpoint", sa.String(), nullable=False),
31 |         sa.Column(
32 |             "method", sa.Enum("CONNECT", "DELETE", "GET", "HEAD", "OPTIONS", "PATCH", "POST", "PUT", "TRACE", name="httpmethod"), nullable=True
33 |         ),
34 |         sa.Column("model", sa.String(), nullable=True),
35 |         sa.Column("prompt_tokens", sa.Integer(), nullable=True),
36 |         sa.Column("completion_tokens", sa.Float(), nullable=True),
37 |         sa.Column("total_tokens", sa.Integer(), nullable=True),
38 |         sa.Column("status", sa.Integer(), nullable=True),
39 |         sa.PrimaryKeyConstraint("id"),
40 |     )
41 | 
42 | 
43 | def downgrade() -> None:
44 |     """Downgrade schema."""
45 |     op.drop_table("usage")
46 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_04_04_1641-8c5ae2a3d4d0_remove_delete_cascade_usage_token_id.py:
--------------------------------------------------------------------------------
 1 | """remove delete cascade usage token id
 2 | 
 3 | Revision ID: 8c5ae2a3d4d0
 4 | Revises: e78eaed1bcb2
 5 | Create Date: 2025-04-04 16:41:52.058857
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = "8c5ae2a3d4d0"
16 | down_revision: Union[str, None] = "e78eaed1bcb2"
17 | branch_labels: Union[str, Sequence[str], None] = None
18 | depends_on: Union[str, Sequence[str], None] = None
19 | 
20 | 
21 | def upgrade() -> None:
22 |     """Upgrade schema."""
23 |     # ### commands auto generated by Alembic - please adjust! ###
24 |     op.drop_constraint("usage_token_id_fkey", "usage", type_="foreignkey")
25 |     op.create_foreign_key(None, "usage", "token", ["token_id"], ["id"], ondelete="SET NULL")
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade() -> None:
30 |     """Downgrade schema."""
31 |     # ### commands auto generated by Alembic - please adjust! ###
32 |     op.drop_constraint(None, "usage", type_="foreignkey")
33 |     op.create_foreign_key("usage_token_id_fkey", "usage", "token", ["token_id"], ["id"], ondelete="CASCADE")
34 |     # ### end Alembic commands ###
35 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_04_15_1513-a4ac45e7c990_remove_unique_name_on_token_and_.py:
--------------------------------------------------------------------------------
 1 | """remove unique name on token and collection
 2 | 
 3 | Revision ID: a4ac45e7c990
 4 | Revises: 8c5ae2a3d4d0
 5 | Create Date: 2025-04-15 15:13:33.886841
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = "a4ac45e7c990"
16 | down_revision: Union[str, None] = "8c5ae2a3d4d0"
17 | branch_labels: Union[str, Sequence[str], None] = None
18 | depends_on: Union[str, Sequence[str], None] = None
19 | 
20 | 
21 | def upgrade() -> None:
22 |     """Upgrade schema."""
23 |     # ### commands auto generated by Alembic - please adjust! ###
24 |     op.drop_constraint("unique_collection_name_per_user", "collection", type_="unique")
25 |     op.drop_constraint("unique_token_name_per_user", "token", type_="unique")
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade() -> None:
30 |     """Downgrade schema."""
31 |     # ### commands auto generated by Alembic - please adjust! ###
32 |     op.create_unique_constraint("unique_token_name_per_user", "token", ["user_id", "name"])
33 |     op.create_unique_constraint("unique_collection_name_per_user", "collection", ["user_id", "name"])
34 |     # ### end Alembic commands ###
35 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_04_17_1201-896cbf4c2cbb_add_new_columns_to_usages_table.py:
--------------------------------------------------------------------------------
 1 | """Add new columns to usages table
 2 | 
 3 | Revision ID: 896cbf4c2cbb
 4 | Revises: 8c5ae2a3d4d0
 5 | Create Date: 2025-04-17 12:01:15.970424
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "896cbf4c2cbb"
17 | down_revision: Union[str, None] = "a4ac45e7c990"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.add_column("usage", sa.Column("time_to_first_token", sa.Integer(), nullable=True))
26 |     op.add_column("usage", sa.Column("request_model", sa.String(), nullable=True))
27 |     # ### end Alembic commands ###
28 | 
29 | 
30 | def downgrade() -> None:
31 |     """Downgrade schema."""
32 |     # ### commands auto generated by Alembic - please adjust! ###
33 |     op.drop_column("usage", "request_model")
34 |     op.drop_column("usage", "time_to_first_token")
35 |     # ### end Alembic commands ###
36 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_05_12_1613-5553fa60acfa_remove_default_role_attribut.py:
--------------------------------------------------------------------------------
 1 | """remove default role attribut
 2 | 
 3 | Revision ID: 5553fa60acfa
 4 | Revises: 896cbf4c2cbb
 5 | Create Date: 2025-05-12 16:13:49.420409
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "5553fa60acfa"
17 | down_revision: Union[str, None] = "896cbf4c2cbb"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.drop_index("only_one_default_role", table_name="role", postgresql_where='"default"')
26 |     op.drop_column("role", "default")
27 |     # ### end Alembic commands ###
28 | 
29 | 
30 | def downgrade() -> None:
31 |     """Downgrade schema."""
32 |     # ### commands auto generated by Alembic - please adjust! ###
33 |     op.add_column("role", sa.Column("default", sa.BOOLEAN(), autoincrement=False, nullable=False))
34 |     op.create_index("only_one_default_role", "role", ["default"], unique=True, postgresql_where='"default"')
35 |     # ### end Alembic commands ###
36 | 


--------------------------------------------------------------------------------
/app/alembic/versions/2025_05_28_1541-752279f74929_add_cost_and_budget_colonne.py:
--------------------------------------------------------------------------------
 1 | """add cost and budget colonne
 2 | 
 3 | Revision ID: 752279f74929
 4 | Revises: 5553fa60acfa
 5 | Create Date: 2025-05-28 15:41:27.302543
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "752279f74929"
17 | down_revision: Union[str, None] = "5553fa60acfa"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.add_column("usage", sa.Column("cost", sa.Float(), nullable=True))
26 |     op.add_column("user", sa.Column("budget", sa.Float(), nullable=True))
27 |     # ### end Alembic commands ###
28 | 
29 | 
30 | def downgrade() -> None:
31 |     """Downgrade schema."""
32 |     # ### commands auto generated by Alembic - please adjust! ###
33 |     op.drop_column("user", "budget")
34 |     op.drop_column("usage", "cost")
35 |     # ### end Alembic commands ###
36 | 


--------------------------------------------------------------------------------
/app/clients/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/clients/__init__.py


--------------------------------------------------------------------------------
/app/clients/database/__init__.py:
--------------------------------------------------------------------------------
1 | from ._qdrantclient import QdrantClient
2 | 
3 | __all__ = ["QdrantClient"]
4 | 


--------------------------------------------------------------------------------
/app/clients/mcp/__init__.py:
--------------------------------------------------------------------------------
1 | from ._secretshellbridgeclient import SecretShellMCPBridgeClient
2 | 
3 | __all__ = [SecretShellMCPBridgeClient]
4 | 


--------------------------------------------------------------------------------
/app/clients/mcp/_secretshellbridgeclient.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import httpx
 4 | from fastapi import HTTPException
 5 | 
 6 | 
 7 | class SecretShellMCPBridgeClient:
 8 |     def __init__(self, mcp_bridge_url: str):
 9 |         self.url = mcp_bridge_url
10 |         self.timeout = 10
11 | 
12 |     async def get_tool_list(self) -> dict:
13 |         async with httpx.AsyncClient(timeout=self.timeout) as async_client:
14 |             try:
15 |                 response = await async_client.request(method="GET", url=self.url + "/mcp/tools", headers={})
16 |             except (httpx.TimeoutException, httpx.ReadTimeout, httpx.ConnectTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
17 |                 raise HTTPException(status_code=504, detail="Request timed out")
18 |             except Exception as e:
19 |                 raise HTTPException(status_code=500, detail=type(e).__name__)
20 |         return response.json()
21 | 
22 |     async def call_tool(self, tool_name: str, params: str):
23 |         try:
24 |             params = json.loads(params)
25 |         except json.JSONDecodeError:
26 |             print(f"failed to decode json for {tool_name}")
27 |             return None
28 |         async with httpx.AsyncClient(timeout=self.timeout) as async_client:
29 |             try:
30 |                 response = await async_client.request(method="POST", json=params, url=self.url + f"/mcp/tools/{tool_name}/call", headers={})
31 |             except (httpx.TimeoutException, httpx.ReadTimeout, httpx.ConnectTimeout, httpx.WriteTimeout, httpx.PoolTimeout) as e:
32 |                 raise HTTPException(status_code=504, detail="Request timed out")
33 |             except Exception as e:
34 |                 raise HTTPException(status_code=500, detail=type(e).__name__)
35 |         return response.json()
36 | 


--------------------------------------------------------------------------------
/app/clients/model/__init__.py:
--------------------------------------------------------------------------------
1 | from ._basemodelclient import BaseModelClient
2 | from ._openaimodelclient import OpenaiModelClient
3 | from ._vllmmodelclient import VllmModelClient
4 | from ._teimodelclient import TeiModelClient
5 | 
6 | __all__ = [BaseModelClient, OpenaiModelClient, VllmModelClient, TeiModelClient]
7 | 


--------------------------------------------------------------------------------
/app/clients/model/_albertmodelclient.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urljoin
 2 | 
 3 | import requests
 4 | 
 5 | from app.schemas.core.settings import ModelClientCarbonFootprint
 6 | from app.schemas.models import ModelCosts
 7 | from app.utils.variables import (
 8 |     ENDPOINT__AUDIO_TRANSCRIPTIONS,
 9 |     ENDPOINT__CHAT_COMPLETIONS,
10 |     ENDPOINT__COMPLETIONS,
11 |     ENDPOINT__EMBEDDINGS,
12 |     ENDPOINT__MODELS,
13 |     ENDPOINT__OCR,
14 |     ENDPOINT__RERANK,
15 | )
16 | 
17 | from ._basemodelclient import BaseModelClient
18 | 
19 | 
20 | class AlbertModelClient(BaseModelClient):
21 |     ENDPOINT_TABLE = {
22 |         ENDPOINT__AUDIO_TRANSCRIPTIONS: "/v1/audio/transcriptions",
23 |         ENDPOINT__CHAT_COMPLETIONS: "/v1/chat/completions",
24 |         ENDPOINT__COMPLETIONS: "/v1/completions",
25 |         ENDPOINT__EMBEDDINGS: "/v1/embeddings",
26 |         ENDPOINT__MODELS: "/v1/models",
27 |         ENDPOINT__OCR: "/v1/chat/completions",
28 |         ENDPOINT__RERANK: "/v1/rerank",
29 |     }
30 | 
31 |     def __init__(
32 |         self, model: str, costs: ModelCosts, carbon: ModelClientCarbonFootprint, api_url: str, api_key: str, timeout: int, *args, **kwargs
33 |     ) -> None:
34 |         """
35 |         Initialize the OpenAI model client and check if the model is available.
36 |         """
37 |         super().__init__(model=model, costs=costs, carbon=carbon, api_url=api_url, api_key=api_key, timeout=timeout, *args, **kwargs)
38 | 
39 |         # check if model is available
40 |         url = urljoin(base=str(self.api_url), url=self.ENDPOINT_TABLE[ENDPOINT__MODELS])
41 |         headers = {"Authorization": f"Bearer {self.api_key}"} if self.api_key else None
42 | 
43 |         response = requests.get(url=url, headers=headers, timeout=self.timeout)
44 |         assert response.status_code == 200, f"Failed to get models list ({response.status_code})."
45 | 
46 |         response = response.json()["data"]
47 |         response = [model for model in response if model["id"] == self.model or self.model in model["aliases"]]
48 |         assert len(response) == 1, "Failed to get models list (model not found)."
49 | 
50 |         # set attributes of the model
51 |         response = response[0]
52 |         self.max_context_length = response.get("max_context_length")
53 | 
54 |         # set vector size
55 |         response = requests.post(
56 |             url=urljoin(base=self.api_url, url=self.ENDPOINT_TABLE[ENDPOINT__EMBEDDINGS]),
57 |             headers=headers,
58 |             json={"model": self.model, "input": "hello world"},
59 |             timeout=self.timeout,
60 |         )
61 |         if response.status_code == 200:
62 |             self.vector_size = len(response.json()["data"][0]["embedding"])
63 |         else:
64 |             self.vector_size = None
65 | 


--------------------------------------------------------------------------------
/app/clients/model/_openaimodelclient.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urljoin
 2 | 
 3 | import requests
 4 | 
 5 | from app.schemas.core.settings import ModelClientCarbonFootprint
 6 | from app.schemas.models import ModelCosts
 7 | from app.utils.variables import (
 8 |     ENDPOINT__AUDIO_TRANSCRIPTIONS,
 9 |     ENDPOINT__CHAT_COMPLETIONS,
10 |     ENDPOINT__COMPLETIONS,
11 |     ENDPOINT__EMBEDDINGS,
12 |     ENDPOINT__MODELS,
13 |     ENDPOINT__OCR,
14 |     ENDPOINT__RERANK,
15 | )
16 | 
17 | from ._basemodelclient import BaseModelClient
18 | 
19 | 
20 | class OpenaiModelClient(BaseModelClient):
21 |     ENDPOINT_TABLE = {
22 |         ENDPOINT__AUDIO_TRANSCRIPTIONS: "/v1/audio/transcriptions",
23 |         ENDPOINT__CHAT_COMPLETIONS: "/v1/chat/completions",
24 |         ENDPOINT__COMPLETIONS: "/v1/completions",
25 |         ENDPOINT__EMBEDDINGS: "/v1/embeddings",
26 |         ENDPOINT__MODELS: "/v1/models",
27 |         ENDPOINT__OCR: "/v1/chat/completions",
28 |         ENDPOINT__RERANK: None,
29 |     }
30 | 
31 |     def __init__(
32 |         self, model: str, costs: ModelCosts, carbon: ModelClientCarbonFootprint, api_url: str, api_key: str, timeout: int, *args, **kwargs
33 |     ) -> None:
34 |         """
35 |         Initialize the OpenAI model client and check if the model is available.
36 |         """
37 |         super().__init__(model=model, costs=costs, carbon=carbon, api_url=api_url, api_key=api_key, timeout=timeout, *args, **kwargs)
38 | 
39 |         # check if model is available
40 |         url = urljoin(base=str(self.api_url), url=self.ENDPOINT_TABLE[ENDPOINT__MODELS])
41 |         headers = {"Authorization": f"Bearer {self.api_key}"} if self.api_key else None
42 | 
43 |         response = requests.get(url=url, headers=headers, timeout=self.timeout)
44 |         assert response.status_code == 200, f"Failed to get models list ({response.status_code})."
45 | 
46 |         response = response.json()["data"]
47 |         response = [model for model in response if model["id"] == self.model]
48 |         assert len(response) == 1, "Failed to get models list (model not found)."
49 | 
50 |         # set attributes of the model
51 |         response = response[0]
52 |         self.max_context_length = response.get("max_context_length")
53 | 
54 |         # set vector size
55 |         response = requests.post(
56 |             url=urljoin(base=self.api_url, url=self.ENDPOINT_TABLE[ENDPOINT__EMBEDDINGS]),
57 |             headers=headers,
58 |             json={"model": self.model, "input": "hello world"},
59 |             timeout=self.timeout,
60 |         )
61 |         if response.status_code == 200:
62 |             self.vector_size = len(response.json()["data"][0]["embedding"])
63 |         else:
64 |             self.vector_size = None
65 | 


--------------------------------------------------------------------------------
/app/clients/model/_vllmmodelclient.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urljoin
 2 | 
 3 | import requests
 4 | 
 5 | from app.schemas.core.settings import ModelClientCarbonFootprint
 6 | from app.schemas.models import ModelCosts
 7 | from app.utils.variables import (
 8 |     ENDPOINT__AUDIO_TRANSCRIPTIONS,
 9 |     ENDPOINT__CHAT_COMPLETIONS,
10 |     ENDPOINT__COMPLETIONS,
11 |     ENDPOINT__EMBEDDINGS,
12 |     ENDPOINT__MODELS,
13 |     ENDPOINT__OCR,
14 |     ENDPOINT__RERANK,
15 | )
16 | 
17 | from ._basemodelclient import BaseModelClient
18 | 
19 | 
20 | class VllmModelClient(BaseModelClient):
21 |     ENDPOINT_TABLE = {
22 |         ENDPOINT__AUDIO_TRANSCRIPTIONS: None,
23 |         ENDPOINT__CHAT_COMPLETIONS: "/v1/chat/completions",
24 |         ENDPOINT__COMPLETIONS: None,
25 |         ENDPOINT__EMBEDDINGS: None,
26 |         ENDPOINT__MODELS: "/v1/models",
27 |         ENDPOINT__OCR: "/v1/chat/completions",
28 |         ENDPOINT__RERANK: None,
29 |     }
30 | 
31 |     def __init__(
32 |         self, model: str, costs: ModelCosts, carbon: ModelClientCarbonFootprint, api_url: str, api_key: str, timeout: int, *args, **kwargs
33 |     ) -> None:
34 |         """
35 |         Initialize the VLLM model client and check if the model is available.
36 |         """
37 |         super().__init__(model=model, costs=costs, carbon=carbon, api_url=api_url, api_key=api_key, timeout=timeout, *args, **kwargs)
38 | 
39 |         # check if model is available
40 |         url = urljoin(base=str(self.api_url), url=self.ENDPOINT_TABLE[ENDPOINT__MODELS])
41 |         headers = {"Authorization": f"Bearer {self.api_key}"} if self.api_key else None
42 | 
43 |         response = requests.get(url=url, headers=headers, timeout=self.timeout)
44 |         assert response.status_code == 200, f"Failed to get models list ({response.status_code})."
45 | 
46 |         response = response.json()["data"]
47 |         response = [model for model in response if model["id"] == self.model]
48 |         assert len(response) == 1, "Failed to get models list (model not found)."
49 | 
50 |         # set attributes of the model
51 |         response = response[0]
52 |         self.max_context_length = response.get("max_model_len")
53 | 
54 |         # set vector size
55 |         self.vector_size = None
56 | 


--------------------------------------------------------------------------------
/app/clients/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from ._baseparserclient import BaseParserClient
2 | from ._markerparserclient import MarkerParserClient
3 | 
4 | __all__ = ["BaseParserClient", "MarkerParserClient"]
5 | 


--------------------------------------------------------------------------------
/app/clients/parser/_baseparserclient.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import importlib
 3 | from typing import Optional, Type
 4 | 
 5 | from fastapi import UploadFile
 6 | 
 7 | from app.schemas.core.settings import ParserType
 8 | from app.schemas.parse import Languages, ParsedDocument, ParsedDocumentOutputFormat
 9 | 
10 | 
11 | class BaseParserClient(ABC):
12 |     SUPPORTED_FORMATS = []
13 | 
14 |     @staticmethod
15 |     def import_module(type: ParserType) -> "Type[BaseParserClient]":
16 |         """
17 |         Import the module for the given parser type.
18 |         """
19 |         module = importlib.import_module(f"app.clients.parser._{type.value}parserclient")
20 |         return getattr(module, f"{type.capitalize()}ParserClient")
21 | 
22 |     @abstractmethod
23 |     def parse(
24 |         self,
25 |         file: UploadFile,
26 |         output_format: Optional[ParsedDocumentOutputFormat] = None,
27 |         force_ocr: bool = False,
28 |         languages: Optional[Languages] = None,
29 |         page_range: Optional[str] = None,
30 |         paginate_output: Optional[bool] = None,
31 |         use_llm: Optional[bool] = None,
32 |     ) -> ParsedDocument:
33 |         pass
34 | 


--------------------------------------------------------------------------------
/app/clients/web_search/__init__.py:
--------------------------------------------------------------------------------
1 | from ._basewebsearchclient import BaseWebSearchClient
2 | from ._bravewebsearchclient import BraveWebSearchClient
3 | from ._duckduckgowebsearchclient import DuckduckgoWebSearchClient
4 | 
5 | __all__ = ["BaseWebSearchClient", "BraveWebSearchClient", "DuckduckgoWebSearchClient"]
6 | 


--------------------------------------------------------------------------------
/app/clients/web_search/_basewebsearchclient.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import importlib
 3 | from typing import List, Type
 4 | 
 5 | from app.schemas.core.settings import WebSearchType
 6 | 
 7 | 
 8 | class BaseWebSearchClient(ABC):
 9 |     @staticmethod
10 |     def import_module(type: WebSearchType) -> "Type[BaseWebSearchClient]":
11 |         """
12 |         Import the module for the given web search type.
13 |         """
14 |         module = importlib.import_module(f"app.clients.web_search._{type.value}websearchclient")
15 |         return getattr(module, f"{type.capitalize()}WebSearchClient")
16 | 
17 |     @abstractmethod
18 |     def search(self, query: str, n: int = 3) -> List[str]:
19 |         """
20 |         Get the URLs of the search results for a given query.
21 | 
22 |         Args:
23 |             query (str): The query to search for.
24 |             n (int): The number of results to return.
25 | 
26 |         Returns:
27 |             List[str]: The URLs of the search results.
28 |         """
29 |         pass
30 | 


--------------------------------------------------------------------------------
/app/clients/web_search/_bravewebsearchclient.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | 
 4 | import httpx
 5 | 
 6 | from app.clients.web_search._basewebsearchclient import BaseWebSearchClient
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class BraveWebSearchClient(BaseWebSearchClient):
12 |     URL = "https://api.search.brave.com/res/v1/web/search"
13 |     DEFAULT_TIMEOUT = 5
14 | 
15 |     def __init__(self, api_key: str, user_agent: str, *args, **kwargs) -> None:
16 |         self.api_key = api_key
17 |         self.headers = {"Accept": "application/json", "X-Subscription-Token": self.api_key, "User-Agent": user_agent}
18 | 
19 |     async def search(self, query: str, n: int = 3) -> List[str]:
20 |         params = {"q": query, "count": n, "country": "fr", "safesearch": "strict"}
21 | 
22 |         try:
23 |             async with httpx.AsyncClient(timeout=self.DEFAULT_TIMEOUT) as client:
24 |                 response = await client.get(url=self.URL, headers=self.headers, params=params)
25 |                 results = response.json().get("web", {}).get("results", [])
26 |         except Exception:
27 |             logger.exception(msg="Brave Search API unreachable.")
28 |             results = []
29 | 
30 |         return [result["url"].lower() for result in results]
31 | 


--------------------------------------------------------------------------------
/app/clients/web_search/_duckduckgowebsearchclient.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List
 3 | 
 4 | import httpx
 5 | 
 6 | from app.clients.web_search._basewebsearchclient import BaseWebSearchClient
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class DuckduckgoWebSearchClient(BaseWebSearchClient):
12 |     URL = "https://api.duckduckgo.com/"
13 |     DEFAULT_TIMEOUT = 5
14 | 
15 |     def __init__(self, user_agent: str, *args, **kwargs) -> None:
16 |         self.headers = {"User-Agent": user_agent}
17 | 
18 |     async def search(self, query: str, n: int = 3) -> List[str]:
19 |         params = {
20 |             "q": query,
21 |             "format": "json",
22 |             "kl": "fr-fr",
23 |             "safe": 1,
24 |         }
25 | 
26 |         try:
27 |             async with httpx.AsyncClient(timeout=self.DEFAULT_TIMEOUT) as client:
28 |                 response = await client.get(url=self.URL, headers=self.headers, params=params, follow_redirects=True)
29 |                 results = response.json().get("Results", [])[:n]
30 |         except Exception:
31 |             logger.exception(msg="DuckDuckGo API unreachable.")
32 |             results = []
33 | 
34 |         return [result["FirstURL"].lower() for result in results]
35 | 


--------------------------------------------------------------------------------
/app/endpoints/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/endpoints/__init__.py


--------------------------------------------------------------------------------
/app/endpoints/audio.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Literal
 2 | 
 3 | from fastapi import APIRouter, File, Form, Request, Security, UploadFile
 4 | from fastapi.responses import JSONResponse, PlainTextResponse
 5 | 
 6 | from app.helpers._accesscontroller import AccessController
 7 | from app.schemas.audio import AudioTranscription
 8 | from app.utils.context import global_context
 9 | from app.utils.variables import AUDIO_SUPPORTED_LANGUAGES_VALUES, ENDPOINT__AUDIO_TRANSCRIPTIONS
10 | 
11 | router = APIRouter()
12 | 
13 | AudioTranscriptionModel = Form(default=..., description="ID of the model to use. Call `/v1/models` endpoint to get the list of available models, only `automatic-speech-recognition` model type is supported.")  # fmt: off
14 | AudioTranscriptionLanguage = Form(default="fr", description="The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.")  # fmt: off
15 | AudioTranscriptionPrompt = Form(default=None, description="Not implemented.")  # fmt: off
16 | AudioTranscriptionResponseFormat = Form(default="json", description="The format of the transcript output, in one of these formats: `json` or `text`.")  # fmt: off
17 | AudioTranscriptionTemperature = Form(default=0, description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.")  # fmt: off
18 | AudioTranscriptionTimestampGranularities = Form(default=["segment"], description="Not implemented.")  # fmt: off
19 | 
20 | 
21 | @router.post(path=ENDPOINT__AUDIO_TRANSCRIPTIONS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=AudioTranscription)  # fmt: off
22 | async def audio_transcriptions(
23 |     request: Request,
24 |     file: UploadFile = File(description="The audio file object (not file name) to transcribe, in one of these formats: mp3 or wav."),
25 |     model: str = AudioTranscriptionModel,
26 |     language: Literal[*AUDIO_SUPPORTED_LANGUAGES_VALUES] = AudioTranscriptionLanguage,
27 |     prompt: str = AudioTranscriptionPrompt,
28 |     response_format: Literal["json", "text"] = AudioTranscriptionResponseFormat,
29 |     temperature: float = AudioTranscriptionTemperature,
30 |     timestamp_granularities: List[str] = AudioTranscriptionTimestampGranularities,
31 | ) -> AudioTranscription:
32 |     """
33 |     Transcribes audio into the input language.
34 |     """
35 | 
36 |     # @TODO: Implement prompt
37 |     # @TODO: Implement timestamp_granularities
38 |     # @TODO: Implement verbose response format
39 | 
40 |     file_content = await file.read()
41 |     model = global_context.models(model=model)
42 |     client = model.get_client(endpoint=ENDPOINT__AUDIO_TRANSCRIPTIONS)
43 |     data = {
44 |         "model": client.model,
45 |         "language": language,
46 |         "response_format": response_format,
47 |         "temperature": temperature,
48 |         "timestamp_granularities": timestamp_granularities,
49 |     }
50 |     response = await client.forward_request(method="POST", files={"file": (file.filename, file_content, file.content_type)}, data=data)
51 | 
52 |     if response_format == "text":
53 |         return PlainTextResponse(content=response.text)
54 | 
55 |     return JSONResponse(content=AudioTranscription(**response.json()).model_dump(), status_code=response.status_code)
56 | 


--------------------------------------------------------------------------------
/app/endpoints/chunks.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | from uuid import UUID
 3 | 
 4 | from fastapi import APIRouter, Depends, Path, Query, Request, Security
 5 | from sqlalchemy.ext.asyncio import AsyncSession
 6 | 
 7 | from app.helpers._accesscontroller import AccessController
 8 | from app.schemas.chunks import Chunk, Chunks
 9 | from app.sql.session import get_db as get_session
10 | from app.utils.context import global_context, request_context
11 | from app.utils.exceptions import ChunkNotFoundException
12 | from app.utils.variables import ENDPOINT__CHUNKS
13 | 
14 | router = APIRouter()
15 | 
16 | 
17 | @router.get(path=ENDPOINT__CHUNKS + "/{document:path}/{chunk:path}", dependencies=[Security(dependency=AccessController())], status_code=200)
18 | async def get_chunk(
19 |     request: Request,
20 |     document: int = Path(description="The document ID"),
21 |     chunk: int = Path(description="The chunk ID"),
22 |     session: AsyncSession = Depends(get_session),
23 | ) -> Chunk:
24 |     """
25 |     Get a chunk of a document.
26 |     """
27 |     if not global_context.documents:  # no vector store available
28 |         raise ChunkNotFoundException()
29 | 
30 |     chunks = await global_context.documents.get_chunks(session=session, document_id=document, chunk_id=chunk, user_id=request_context.get().user_id)
31 | 
32 |     return chunks[0]
33 | 
34 | 
35 | @router.get(path=ENDPOINT__CHUNKS + "/{document}", dependencies=[Security(dependency=AccessController())], status_code=200)
36 | async def get_chunks(
37 |     request: Request,
38 |     document: int = Path(description="The document ID"),
39 |     limit: int = Query(default=10, ge=1, le=100, description="The number of documents to return"),
40 |     offset: Union[int, UUID] = Query(default=0, description="The offset of the first document to return"),
41 |     session: AsyncSession = Depends(get_session),
42 | ) -> Chunks:
43 |     """
44 |     Get chunks of a document.
45 |     """
46 |     if not global_context.documents:  # no vector store available
47 |         data = []
48 |     else:
49 |         data = await global_context.documents.get_chunks(
50 |             session=session,
51 |             document_id=document,
52 |             limit=limit,
53 |             offset=offset,
54 |             user_id=request_context.get().user_id,
55 |         )
56 | 
57 |     return Chunks(data=data)
58 | 


--------------------------------------------------------------------------------
/app/endpoints/completions.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | 
 4 | from app.helpers._accesscontroller import AccessController
 5 | from app.schemas.completions import CompletionRequest, Completions
 6 | from app.utils.context import global_context
 7 | from app.utils.variables import ENDPOINT__COMPLETIONS
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post(path=ENDPOINT__COMPLETIONS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Completions)
13 | async def completions(request: Request, body: CompletionRequest) -> JSONResponse:
14 |     """
15 |     Completion API similar to OpenAI's API.
16 |     """
17 | 
18 |     model = global_context.models(model=body.model)
19 |     client = model.get_client(endpoint=ENDPOINT__COMPLETIONS)
20 |     response = await client.forward_request(method="POST", json=body.model_dump())
21 | 
22 |     return JSONResponse(content=Completions(**response.json()).model_dump(), status_code=response.status_code)
23 | 


--------------------------------------------------------------------------------
/app/endpoints/embeddings.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | 
 4 | from app.helpers._accesscontroller import AccessController
 5 | from app.schemas.embeddings import Embeddings, EmbeddingsRequest
 6 | from app.utils.context import global_context
 7 | from app.utils.variables import ENDPOINT__EMBEDDINGS
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post(path=ENDPOINT__EMBEDDINGS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Embeddings)
13 | async def embeddings(request: Request, body: EmbeddingsRequest) -> JSONResponse:
14 |     """
15 |     Creates an embedding vector representing the input text.
16 |     """
17 | 
18 |     model = global_context.models(model=body.model)
19 |     client = model.get_client(endpoint=ENDPOINT__EMBEDDINGS)
20 |     response = await client.forward_request(method="POST", json=body.model_dump())
21 | 
22 |     return JSONResponse(content=Embeddings(**response.json()).model_dump(), status_code=response.status_code)
23 | 


--------------------------------------------------------------------------------
/app/endpoints/mcp.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | 
 4 | from app.helpers._accesscontroller import AccessController
 5 | from app.schemas.mcp import McpChatCompletionRequest, McpChatCompletion
 6 | from app.utils.context import global_context
 7 | from app.utils.variables import ENDPOINT__AGENTS_TOOLS, ENDPOINT__AGENTS_COMPLETIONS
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post(path=ENDPOINT__AGENTS_COMPLETIONS, dependencies=[Security(dependency=AccessController())], response_model=McpChatCompletion)
13 | async def mcp_completion(request: Request, body: McpChatCompletionRequest) -> JSONResponse:
14 |     agents_manager = global_context.mcp.agents_manager
15 |     response = await agents_manager.get_completion(body)
16 |     return JSONResponse(status_code=response.status_code, content=response.json())
17 | 
18 | 
19 | @router.get(path=ENDPOINT__AGENTS_TOOLS, dependencies=[Security(dependency=AccessController())])
20 | async def mcp_tool_list():
21 |     agents_manager = global_context.mcp.agents_manager
22 |     response = await agents_manager.get_tools_from_bridge()
23 |     return JSONResponse(status_code=200, content={"tools": response})
24 | 


--------------------------------------------------------------------------------
/app/endpoints/models.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Path, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | 
 4 | from app.helpers._accesscontroller import AccessController
 5 | from app.schemas.models import Model, Models
 6 | from app.utils.context import global_context
 7 | from app.utils.variables import ENDPOINT__MODELS
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.get(path=ENDPOINT__MODELS + "/{model:path}", dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Model)
13 | async def get_model(request: Request, model: str = Path(description="The name of the model to get.")) -> JSONResponse:
14 |     """
15 |     Get a model by name and provide basic informations.
16 |     """
17 | 
18 |     model = global_context.models.list(model=model)[0]
19 | 
20 |     return JSONResponse(content=model.model_dump(), status_code=200)
21 | 
22 | 
23 | @router.get(path=ENDPOINT__MODELS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Models)
24 | async def get_models(request: Request) -> JSONResponse:
25 |     """
26 |     Lists the currently available models and provides basic informations.
27 |     """
28 | 
29 |     data = global_context.models.list()
30 | 
31 |     return JSONResponse(content=Models(data=data).model_dump(), status_code=200)
32 | 


--------------------------------------------------------------------------------
/app/endpoints/ocr.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | 
 3 | from fastapi import APIRouter, Request, Security, UploadFile
 4 | from fastapi.responses import JSONResponse
 5 | import pymupdf
 6 | 
 7 | from app.helpers._accesscontroller import AccessController
 8 | from app.schemas.core.documents import FileType
 9 | from app.schemas.ocr import DPIForm, ModelForm, PromptForm
10 | from app.schemas.parse import FileForm, ParsedDocument, ParsedDocumentMetadata, ParsedDocumentPage
11 | from app.schemas.usage import Usage
12 | from app.utils.context import global_context
13 | from app.utils.exceptions import FileSizeLimitExceededException
14 | from app.utils.variables import ENDPOINT__OCR
15 | 
16 | router = APIRouter()
17 | 
18 | 
19 | @router.post(path=ENDPOINT__OCR, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=ParsedDocument)
20 | async def ocr(request: Request, file: UploadFile = FileForm, model: str = ModelForm, dpi: int = DPIForm, prompt: str = PromptForm) -> JSONResponse:
21 |     """
22 |     Extracts text from PDF files using OCR.
23 |     """
24 |     # check if file is a pdf (raises UnsupportedFileTypeException if not a PDF)
25 |     global_context.parser._detect_file_type(file=file, type=FileType.PDF)
26 | 
27 |     # check file size
28 |     if file.size > FileSizeLimitExceededException.MAX_CONTENT_SIZE:
29 |         raise FileSizeLimitExceededException()
30 | 
31 |     # get model client
32 |     model = global_context.models(model=model)
33 |     client = model.get_client(endpoint=ENDPOINT__OCR)
34 | 
35 |     file_content = await file.read()  # open document
36 |     pdf = pymupdf.open(stream=file_content, filetype="pdf")
37 |     document = ParsedDocument(data=[], usage=Usage())
38 | 
39 |     for i, page in enumerate(pdf):  # iterate through the pages
40 |         image = page.get_pixmap(dpi=dpi)  # render page to an image
41 |         img_byte_arr = image.tobytes("png")  # convert pixmap to PNG bytes
42 | 
43 |         # forward request
44 |         payload = {
45 |             "model": model,
46 |             "messages": [
47 |                 {
48 |                     "role": "user",
49 |                     "content": [
50 |                         {"type": "text", "text": prompt},
51 |                         {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64.b64encode(img_byte_arr).decode("utf-8")}"}},
52 |                     ],
53 |                 }
54 |             ],
55 |             "n": 1,
56 |             "stream": False,
57 |         }
58 |         response = await client.forward_request(method="POST", json=payload)  # error are automatically raised
59 |         response = response.json()
60 |         text = response.get("choices", [{}])[0].get("message", {}).get("content", "")
61 | 
62 |         # format response
63 |         document.data.append(
64 |             ParsedDocumentPage(
65 |                 content=text,
66 |                 images={},
67 |                 metadata=ParsedDocumentMetadata(page=i, document_name=file.filename, **pdf.metadata),
68 |             )
69 |         )
70 |         document.usage = Usage(**response.get("usage", {}))
71 | 
72 |     pdf.close()
73 | 
74 |     return JSONResponse(content=document.model_dump(), status_code=200)
75 | 


--------------------------------------------------------------------------------
/app/endpoints/parse.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from fastapi import APIRouter, File, Request, Security, UploadFile
 4 | from fastapi.responses import JSONResponse
 5 | 
 6 | from app.helpers._accesscontroller import AccessController
 7 | from app.schemas.parse import (
 8 |     ForceOCRForm,
 9 |     Languages,
10 |     LanguagesForm,
11 |     OutputFormatForm,
12 |     PageRangeForm,
13 |     PaginateOutputForm,
14 |     ParsedDocument,
15 |     ParsedDocumentOutputFormat,
16 |     UseLLMForm,
17 | )
18 | from app.utils.context import global_context
19 | from app.utils.exceptions import FileSizeLimitExceededException
20 | from app.utils.variables import ENDPOINT__PARSE
21 | 
22 | router = APIRouter()
23 | 
24 | 
25 | @router.post(path=ENDPOINT__PARSE, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=ParsedDocument)
26 | async def parse(
27 |     request: Request,
28 |     file: UploadFile = File(...),
29 |     output_format: ParsedDocumentOutputFormat = OutputFormatForm,
30 |     force_ocr: bool = ForceOCRForm,
31 |     languages: Optional[Languages] = LanguagesForm,
32 |     page_range: str = PageRangeForm,
33 |     paginate_output: Optional[bool] = PaginateOutputForm,
34 |     use_llm: Optional[bool] = UseLLMForm,
35 | ) -> JSONResponse:
36 |     """
37 |     Parse a document.
38 |     """
39 | 
40 |     if file.size > FileSizeLimitExceededException.MAX_CONTENT_SIZE:
41 |         raise FileSizeLimitExceededException()
42 |     document = await global_context.parser.parse_file(
43 |         file=file,
44 |         output_format=output_format,
45 |         force_ocr=force_ocr,
46 |         languages=languages.value,
47 |         page_range=page_range,
48 |         paginate_output=paginate_output,
49 |         use_llm=use_llm,
50 |     )
51 |     return JSONResponse(content=document.model_dump(), status_code=200)
52 | 


--------------------------------------------------------------------------------
/app/endpoints/rerank.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | 
 4 | from app.helpers._accesscontroller import AccessController
 5 | from app.schemas.rerank import RerankRequest, Reranks
 6 | from app.utils.context import global_context
 7 | from app.utils.variables import ENDPOINT__RERANK
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post(path=ENDPOINT__RERANK, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Reranks)
13 | async def rerank(request: Request, body: RerankRequest) -> JSONResponse:
14 |     """
15 |     Creates an ordered array with each text assigned a relevance score, based on the query.
16 |     """
17 | 
18 |     model = global_context.models(model=body.model)
19 |     client = model.get_client(endpoint=ENDPOINT__RERANK)
20 |     response = await client.forward_request(method="POST", json=body.model_dump())
21 | 
22 |     return JSONResponse(content=Reranks(**response.json()).model_dump(), status_code=response.status_code)
23 | 


--------------------------------------------------------------------------------
/app/endpoints/search.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends, Request, Security
 2 | from fastapi.responses import JSONResponse
 3 | from sqlalchemy.ext.asyncio import AsyncSession
 4 | 
 5 | from app.helpers._accesscontroller import AccessController
 6 | from app.schemas.search import Searches, SearchRequest
 7 | from app.sql.session import get_db as get_session
 8 | from app.utils.context import global_context, request_context
 9 | from app.utils.exceptions import CollectionNotFoundException
10 | from app.utils.variables import ENDPOINT__SEARCH
11 | 
12 | router = APIRouter()
13 | 
14 | 
15 | @router.post(path=ENDPOINT__SEARCH, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=Searches)
16 | async def search(request: Request, body: SearchRequest, session: AsyncSession = Depends(get_session)) -> JSONResponse:
17 |     """
18 |     Get relevant chunks from the collections and a query.
19 |     """
20 | 
21 |     if not global_context.documents:  # no vector store available
22 |         raise CollectionNotFoundException()
23 | 
24 |     data = await global_context.documents.search(
25 |         session=session,
26 |         collection_ids=body.collections,
27 |         prompt=body.prompt,
28 |         method=body.method,
29 |         k=body.k,
30 |         rff_k=body.rff_k,
31 |         user_id=request_context.get().user_id,
32 |         web_search=body.web_search,
33 |     )
34 |     usage = request_context.get().usage
35 |     content = Searches(data=data, usage=usage)
36 | 
37 |     return JSONResponse(content=content.model_dump(), status_code=200)
38 | 


--------------------------------------------------------------------------------
/app/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/helpers/__init__.py


--------------------------------------------------------------------------------
/app/helpers/_streamingresponsewithstatuscode.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import traceback
 4 | from typing import AsyncIterator
 5 | 
 6 | from fastapi.responses import StreamingResponse
 7 | from starlette.types import Send
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class StreamingResponseWithStatusCode(StreamingResponse):
13 |     """
14 |     Variation of StreamingResponse that can dynamically decide the HTTP status code,
15 |     based on the return value of the content iterator (parameter `content`).
16 |     Expects the content to yield either just str content as per the original `StreamingResponse`
17 |     or else tuples of (`content`: `str`, `status_code`: `int`).
18 |     """
19 | 
20 |     body_iterator: AsyncIterator[str | bytes]
21 |     response_started: bool = False
22 | 
23 |     async def stream_response(self, send: Send) -> None:
24 |         more_body = True
25 |         try:
26 |             first_chunk = await self.body_iterator.__anext__()
27 |             if isinstance(first_chunk, tuple):
28 |                 first_chunk_content, self.status_code = first_chunk
29 |             else:
30 |                 first_chunk_content, self.status_code = first_chunk, 200
31 | 
32 |             if isinstance(first_chunk_content, str):
33 |                 first_chunk_content = first_chunk_content.encode(self.charset)
34 | 
35 |             await send({"type": "http.response.start", "status": self.status_code, "headers": self.raw_headers})
36 | 
37 |             self.response_started = True
38 |             await send({"type": "http.response.body", "body": first_chunk_content, "more_body": more_body})
39 | 
40 |             async for chunk in self.body_iterator:
41 |                 if isinstance(chunk, tuple):
42 |                     content, status_code = chunk
43 |                     if status_code // 100 != 2:
44 |                         # an error occurred mid-stream
45 |                         if not isinstance(content, bytes):
46 |                             content = content.encode(self.charset)
47 |                         more_body = False
48 |                         await send({"type": "http.response.body", "body": content, "more_body": more_body})
49 |                         return
50 |                 else:
51 |                     content = chunk
52 | 
53 |                 if isinstance(content, str):
54 |                     content = content.encode(self.charset)
55 |                 more_body = True
56 |                 await send({"type": "http.response.body", "body": content, "more_body": more_body})
57 | 
58 |         except Exception:
59 |             logger.error(traceback.format_exc())
60 |             more_body = False
61 |             error_resp = {"error": {"message": "Internal Server Error"}}
62 |             error_event = f"event: error\ndata: {json.dumps(error_resp)}\n\n".encode(self.charset)
63 |             if not self.response_started:
64 |                 await send({"type": "http.response.start", "status": 500, "headers": self.raw_headers})
65 |             await send({"type": "http.response.body", "body": error_event, "more_body": more_body})
66 |         if more_body:
67 |             await send({"type": "http.response.body", "body": b"", "more_body": False})
68 | 


--------------------------------------------------------------------------------
/app/helpers/_websearchmanager.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | import logging
 3 | from typing import List
 4 | from urllib.parse import urlparse
 5 | 
 6 | from fastapi import UploadFile
 7 | import requests
 8 | 
 9 | from app.clients.web_search import BaseWebSearchClient as WebSearchClient
10 | from app.helpers.models.routers import ModelRouter
11 | from app.utils.variables import ENDPOINT__CHAT_COMPLETIONS
12 | 
13 | from app.utils.settings import settings
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class WebSearchManager:
19 |     GET_WEB_QUERY_PROMPT = """Tu es un spécialiste pour transformer des demandes en requête google. Tu sais écrire les meilleurs types de recherches pour arriver aux meilleurs résultats.
20 | Voici la demande : {prompt}
21 | Réponds en donnant uniquement une requête google qui permettrait de trouver des informations pour répondre à la question.
22 | 
23 | Exemples :
24 | - Question: Peut-on avoir des jours de congé pour un mariage ?
25 |   Réponse: jour de congé mariage conditions
26 | 
27 | - Question: Donnes-moi des informations sur Jules Verne.
28 |   Réponse: Jules Verne
29 | 
30 | - Question: Comment refaire une pièce d'identité ?
31 |   Réponse: renouvellement pièce identité France
32 | 
33 | Ne donnes pas d'explication, ne mets pas de guillemets, réponds uniquement avec la requête google qui renverra les meilleurs résultats pour la demande. Ne mets pas de mots qui ne servent à rien dans la requête Google.
34 | """
35 | 
36 |     def __init__(self, web_search: WebSearchClient, model: ModelRouter) -> None:
37 |         self.web_search = web_search
38 |         self.model = model
39 |         self.limited_domains = settings.web_search.limited_domains if settings.web_search else None
40 |         self.user_agent = settings.web_search.user_agent if settings.web_search else None
41 | 
42 |     async def get_web_query(self, prompt: str) -> str:
43 |         prompt = self.GET_WEB_QUERY_PROMPT.format(prompt=prompt)
44 |         client = self.model.get_client(endpoint=ENDPOINT__CHAT_COMPLETIONS)
45 |         response = await client.forward_request(
46 |             method="POST",
47 |             json={"messages": [{"role": "user", "content": prompt}], "model": self.model.id, "temperature": 0.2, "stream": False},
48 |         )
49 |         query = response.json()["choices"][0]["message"]["content"]
50 | 
51 |         return query
52 | 
53 |     async def get_results(self, query: str, n: int = 3) -> List[UploadFile]:
54 |         urls = await self.web_search.search(query=query, n=n)
55 |         results = []
56 |         for url in urls:
57 |             # Parse the URL and extract the hostname
58 |             parsed = urlparse(url)
59 |             domain = parsed.hostname
60 |             if not domain:
61 |                 # Skip invalid URLs
62 |                 continue
63 | 
64 |             # Check if the domain is authorized
65 |             if self.limited_domains:
66 |                 # Allow exact match or subdomains of allowed domains
67 |                 if not any(domain == allowed or domain.endswith(f".{allowed}") for allowed in self.limited_domains):
68 |                     # Skip unauthorized domains
69 |                     continue
70 | 
71 |             # Fetch the content, skipping on network errors
72 |             try:
73 |                 response = requests.get(url=url, headers={"User-Agent": self.user_agent}, timeout=5)
74 |             except requests.RequestException:
75 |                 logger.exception("Error fetching URL: %s", url)
76 |                 continue
77 | 
78 |             if response.status_code != 200:
79 |                 continue
80 | 
81 |             file = BytesIO(response.text.encode("utf-8"))
82 |             file = UploadFile(filename=f"{url}.html", file=file)
83 |             results.append(file)
84 | 
85 |         return results
86 | 


--------------------------------------------------------------------------------
/app/helpers/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from ._agentsmanager import AgentsManager
2 | 
3 | __all__ = ["AgentsManager"]
4 | 


--------------------------------------------------------------------------------
/app/helpers/agents/_agentsmanager.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import httpx
 3 | 
 4 | from app.clients.mcp import SecretShellMCPBridgeClient
 5 | from app.helpers.models import ModelRegistry
 6 | from app.utils.variables import ENDPOINT__CHAT_COMPLETIONS
 7 | 
 8 | 
 9 | class AgentsManager:
10 |     def __init__(self, mcp_bridge: SecretShellMCPBridgeClient, model_registry: ModelRegistry):
11 |         self.model_registry = model_registry
12 |         self.mcp_bridge = mcp_bridge
13 | 
14 |     async def get_completion(self, body):
15 |         body = await self.set_tools_for_llm_request(body)
16 |         http_llm_response = None
17 |         number_of_iterations = 0
18 |         max_iterations = 2
19 |         while number_of_iterations < max_iterations:
20 |             http_llm_response = await self.get_llm_http_response(body)
21 |             llm_response = json.loads(http_llm_response.text)
22 |             finish_reason = llm_response["choices"][0]["finish_reason"]
23 |             number_of_iterations = number_of_iterations + 1
24 |             if finish_reason in ["stop", "length"]:
25 |                 return http_llm_response
26 |             elif finish_reason == "tool_calls":
27 |                 tool_config = llm_response["choices"][0]["message"]["tool_calls"][0]["function"]
28 |                 tool_name = tool_config["name"]
29 |                 tool_args = tool_config["arguments"]
30 | 
31 |                 tool_call_result = await self.mcp_bridge.call_tool(tool_name, tool_args)
32 |                 body.messages.append({"role": "user", "content": tool_call_result["content"][0]["text"]})
33 |         last_llm_response = http_llm_response.json()
34 |         last_llm_response["choices"][0]["finish_reason"] = "max_iterations"
35 |         llm_response_with_new_finish_reason = httpx.Response(
36 |             status_code=http_llm_response.status_code,
37 |             content=json.dumps(last_llm_response),
38 |             headers=http_llm_response.headers,
39 |             request=http_llm_response.request,
40 |         )
41 |         return llm_response_with_new_finish_reason
42 | 
43 |     async def get_llm_http_response(self, body):
44 |         model = self.model_registry(model=body.model)
45 |         client = model.get_client(endpoint=ENDPOINT__CHAT_COMPLETIONS)
46 |         http_llm_response = await client.forward_request(method="POST", json=body.model_dump())
47 |         return http_llm_response
48 | 
49 |     async def set_tools_for_llm_request(self, body):
50 |         if hasattr(body, "tools") and body.tools is not None:
51 |             tools = await self.get_tools_from_bridge()
52 |             available_tools = [
53 |                 {"type": "function", "function": {"name": tool["name"], "description": tool["description"], "parameters": tool["inputSchema"]}}
54 |                 for tool in tools
55 |             ]
56 |             if "all" in body.tools:
57 |                 body.tools = available_tools
58 |             else:
59 |                 available_tool_names = [tool["function"]["name"] for tool in available_tools]
60 |                 selected_available_tool_names = list(set(body.tools) & set(available_tool_names))
61 |                 used_tools = [
62 |                     available_tool
63 |                     for available_tool in available_tools
64 |                     if available_tool.get("function").get("name") in selected_available_tool_names
65 |                 ]
66 |                 body.tools = used_tools
67 |             body.tool_choice = getattr(body, "tool_choice", "auto")
68 |         return body
69 | 
70 |     async def get_tools_from_bridge(self):
71 |         mcp_bridge_tools = await self.mcp_bridge.get_tool_list()
72 |         all_tools = [section["tools"] for section in mcp_bridge_tools.values()]
73 |         flat_tools = [tool for tools in all_tools for tool in tools]
74 |         return flat_tools
75 | 


--------------------------------------------------------------------------------
/app/helpers/core.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/helpers/core.py


--------------------------------------------------------------------------------
/app/helpers/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/helpers/data/__init__.py


--------------------------------------------------------------------------------
/app/helpers/data/chunkers/__init__.py:
--------------------------------------------------------------------------------
1 | from ._basesplitter import BaseSplitter
2 | from ._nochunker import NoChunker
3 | from ._recursivecharactertextsplitter import RecursiveCharacterTextSplitter
4 | 
5 | __all__ = ["BaseSplitter", "NoChunker", "RecursiveCharacterTextSplitter"]
6 | 


--------------------------------------------------------------------------------
/app/helpers/data/chunkers/_basesplitter.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List, Optional
 3 | 
 4 | from langchain_text_splitters import Language
 5 | 
 6 | from app.schemas.chunks import Chunk
 7 | from app.schemas.parse import ParsedDocument
 8 | 
 9 | 
10 | class BaseSplitter(ABC):
11 |     def __init__(self, chunk_min_size: int = 0, metadata: Optional[dict] = None, language: Optional[Language] = None) -> None:
12 |         self.chunk_min_size = chunk_min_size
13 |         self.metadata = metadata or {}
14 |         self.splitter = None  # this will be set in the child class
15 | 
16 |     @abstractmethod
17 |     def split_document(self, document: ParsedDocument) -> List[Chunk]:
18 |         pass
19 | 


--------------------------------------------------------------------------------
/app/helpers/data/chunkers/_nochunker.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from langchain_text_splitters import Language
 4 | 
 5 | from app.schemas.chunks import Chunk
 6 | from app.schemas.parse import ParsedDocument
 7 | 
 8 | from ._basesplitter import BaseSplitter
 9 | 
10 | 
11 | class NoChunker(BaseSplitter):
12 |     def __init__(self, chunk_min_size: int = 0, metadata: Optional[dict] = None, language: Optional[Language] = None, *args, **kwargs) -> None:
13 |         super().__init__(chunk_min_size=chunk_min_size, metadata=metadata, language=language)
14 | 
15 |     def split_document(self, document: ParsedDocument) -> List[Chunk]:
16 |         chunks = list()
17 |         i = 1
18 | 
19 |         for page in document.data:
20 |             content = page.model_dump().get("content", "")
21 |             if len(content) < self.chunk_min_size:
22 |                 continue
23 |             chunks.append(Chunk(id=i, content=content, metadata=page.metadata.model_dump() | self.metadata))
24 |             i += 1
25 | 
26 |         return chunks
27 | 


--------------------------------------------------------------------------------
/app/helpers/data/chunkers/_recursivecharactertextsplitter.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from langchain_text_splitters import Language
 4 | from langchain_text_splitters import RecursiveCharacterTextSplitter as LangChainRecursiveCharacterTextSplitter
 5 | 
 6 | from app.schemas.chunks import Chunk
 7 | from app.schemas.parse import ParsedDocument
 8 | 
 9 | from ._basesplitter import BaseSplitter
10 | 
11 | 
12 | class RecursiveCharacterTextSplitter(BaseSplitter):
13 |     def __init__(self, chunk_min_size: int = 0, metadata: Optional[dict] = None, language: Optional[Language] = None, *args, **kwargs) -> None:
14 |         super().__init__(chunk_min_size=chunk_min_size, metadata=metadata, language=language)
15 |         if language:
16 |             self.splitter = LangChainRecursiveCharacterTextSplitter.from_language(language=language, *args, **kwargs)
17 |         else:
18 |             self.splitter = LangChainRecursiveCharacterTextSplitter(*args, **kwargs)
19 | 
20 |     def split_document(self, document: ParsedDocument) -> List[Chunk]:
21 |         chunks = list()
22 |         i = 1
23 | 
24 |         for page in document.data:
25 |             content = page.model_dump().get("content", "")
26 |             content_chunks = self.splitter.split_text(content)
27 |             for chunk in content_chunks:
28 |                 if len(chunk) < self.chunk_min_size:
29 |                     continue
30 |                 chunks.append(Chunk(id=i, content=chunk, metadata=page.metadata.model_dump() | self.metadata))
31 |                 i += 1
32 | 
33 |         return chunks
34 | 


--------------------------------------------------------------------------------
/app/helpers/documents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/helpers/documents/__init__.py


--------------------------------------------------------------------------------
/app/helpers/models/__init__.py:
--------------------------------------------------------------------------------
1 | from ._modelregistry import ModelRegistry
2 | 
3 | __all__ = ["ModelRegistry"]
4 | 


--------------------------------------------------------------------------------
/app/helpers/models/_modelregistry.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from app.schemas.models import Model as ModelSchema
 4 | from app.utils.exceptions import ModelNotFoundException
 5 | 
 6 | from app.helpers.models.routers import ModelRouter
 7 | 
 8 | 
 9 | class ModelRegistry:
10 |     def __init__(self, routers: List[ModelRouter]) -> None:
11 |         self.models = list()
12 |         self.aliases = dict()
13 | 
14 |         for model in routers:
15 |             if "id" not in model.__dict__:  # no clients available
16 |                 continue
17 | 
18 |             self.__dict__[model.id] = model
19 |             self.models.append(model.id)
20 | 
21 |             for alias in model.aliases:
22 |                 self.aliases[alias] = model.id
23 | 
24 |     def __call__(self, model: str) -> ModelRouter:
25 |         model = self.aliases.get(model, model)
26 | 
27 |         if model in self.models:
28 |             return self.__dict__[model]
29 |         raise ModelNotFoundException()
30 | 
31 |     def list(self, model: Optional[str] = None) -> List[ModelSchema]:
32 |         data = list()
33 |         models = [model] if model else self.models
34 |         for model in models:
35 |             model = self.__call__(model=model)
36 | 
37 |             data.append(
38 |                 ModelSchema(
39 |                     id=model.id,
40 |                     type=model.type,
41 |                     max_context_length=model.max_context_length,
42 |                     owned_by=model.owned_by,
43 |                     created=model.created,
44 |                     aliases=model.aliases,
45 |                     costs=model.costs,
46 |                 )
47 |             )
48 | 
49 |         return data
50 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/__init__.py:
--------------------------------------------------------------------------------
1 | from ._basemodelrouter import BaseModelRouter
2 | from ._immediatemodelrouter import ImmediateModelRouter
3 | from ._modelrouter import ModelRouter
4 | 
5 | __all__ = ["BaseModelRouter", "ImmediateModelRouter", "ModelRouter"]
6 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/_basemodelrouter.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from itertools import cycle
 3 | import time
 4 | 
 5 | from app.clients.model import BaseModelClient as ModelClient
 6 | from app.schemas.models import ModelCosts, ModelType
 7 | 
 8 | 
 9 | class BaseModelRouter(ABC):
10 |     def __init__(
11 |         self,
12 |         id: str,
13 |         type: ModelType,
14 |         owned_by: str,
15 |         aliases: list[str],
16 |         routing_strategy: str,
17 |         clients: list[ModelClient],
18 |         *args,
19 |         **kwargs,
20 |     ) -> None:
21 |         vector_sizes, max_context_lengths, costs = list(), list(), list()
22 | 
23 |         for client in clients:
24 |             vector_sizes.append(client.vector_size)
25 |             max_context_lengths.append(client.max_context_length)
26 |             costs.append(client.costs)
27 | 
28 |         # consistency checks
29 |         assert len(set(vector_sizes)) < 2, "All embeddings models in the same model group must have the same vector size."
30 | 
31 |         # if there are several models with different max_context_length, it will return the minimal value for consistency of /v1/models response
32 |         max_context_lengths = [value for value in max_context_lengths if value is not None]
33 |         max_context_length = min(max_context_lengths) if max_context_lengths else None
34 | 
35 |         # if there are several models with different costs, it will return the max value for consistency of /v1/models response
36 |         prompt_tokens = max(costs.prompt_tokens for costs in costs)
37 |         completion_tokens = max(costs.completion_tokens for costs in costs)
38 |         costs = ModelCosts(prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
39 | 
40 |         # set attributes of the model (returned by /v1/models endpoint)
41 |         self.id = id
42 |         self.type = type
43 |         self.owned_by = owned_by
44 |         self.created = round(time.time())
45 |         self.aliases = aliases
46 |         self.max_context_length = max_context_length
47 |         self.costs = costs
48 | 
49 |         self._vector_size = vector_sizes[0]
50 |         self._routing_strategy = routing_strategy
51 |         self._cycle = cycle(clients)
52 |         self._clients = clients
53 | 
54 |     @abstractmethod
55 |     def get_client(self, endpoint: str) -> ModelClient:
56 |         """
57 |         Get a client to handle the request
58 | 
59 |         Args:
60 |             endpoint(str): The type of endpoint called
61 | 
62 |         Returns:
63 |             BaseModelClient: The available client
64 |         """
65 |         pass
66 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/_immediatemodelrouter.py:
--------------------------------------------------------------------------------
 1 | from app.clients.model import BaseModelClient as ModelClient
 2 | from app.helpers.models.routers.strategies import RoundRobinRoutingStrategy, ShuffleRoutingStrategy
 3 | from app.schemas.core.models import RoutingStrategy
 4 | from app.schemas.core.settings import Model as ModelClientSettings
 5 | from app.schemas.models import ModelType
 6 | from app.utils.exceptions import WrongModelTypeException
 7 | from app.utils.variables import ENDPOINT__AUDIO_TRANSCRIPTIONS, ENDPOINT__CHAT_COMPLETIONS, ENDPOINT__EMBEDDINGS, ENDPOINT__OCR, ENDPOINT__RERANK
 8 | 
 9 | from ._basemodelrouter import BaseModelRouter
10 | 
11 | 
12 | class ImmediateModelRouter(BaseModelRouter):
13 |     ENDPOINT_MODEL_TYPE_TABLE = {
14 |         ENDPOINT__AUDIO_TRANSCRIPTIONS: [ModelType.AUTOMATIC_SPEECH_RECOGNITION],
15 |         ENDPOINT__CHAT_COMPLETIONS: [ModelType.TEXT_GENERATION, ModelType.IMAGE_TEXT_TO_TEXT],
16 |         ENDPOINT__EMBEDDINGS: [ModelType.TEXT_EMBEDDINGS_INFERENCE],
17 |         ENDPOINT__OCR: [ModelType.IMAGE_TEXT_TO_TEXT],
18 |         ENDPOINT__RERANK: [ModelType.TEXT_CLASSIFICATION],
19 |     }
20 | 
21 |     def __init__(
22 |         self,
23 |         id: str,
24 |         type: ModelType,
25 |         owned_by: str,
26 |         aliases: list[str],
27 |         routing_strategy: str,
28 |         clients: list[ModelClientSettings],
29 |         *args,
30 |         **kwargs,
31 |     ) -> None:
32 |         super().__init__(id, type, owned_by, aliases, routing_strategy, clients, *args, **kwargs)
33 | 
34 |     def get_client(self, endpoint: str) -> ModelClient:
35 |         if endpoint and self.type not in self.ENDPOINT_MODEL_TYPE_TABLE[endpoint]:
36 |             raise WrongModelTypeException()
37 | 
38 |         if self._routing_strategy == RoutingStrategy.ROUND_ROBIN:
39 |             strategy = RoundRobinRoutingStrategy(self._clients, self._cycle)
40 |         else:  # ROUTER_STRATEGY__SHUFFLE
41 |             strategy = ShuffleRoutingStrategy(self._clients)
42 | 
43 |         client = strategy.choose_model_client()
44 |         client.endpoint = endpoint
45 | 
46 |         return client
47 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/_modelrouter.py:
--------------------------------------------------------------------------------
 1 | from app.clients.model import BaseModelClient as ModelClient
 2 | from app.helpers.models.routers.strategies import RoundRobinRoutingStrategy, ShuffleRoutingStrategy
 3 | from app.schemas.core.models import RoutingStrategy
 4 | from app.schemas.models import ModelType
 5 | from app.utils.exceptions import WrongModelTypeException
 6 | from app.utils.variables import ENDPOINT__AUDIO_TRANSCRIPTIONS, ENDPOINT__CHAT_COMPLETIONS, ENDPOINT__EMBEDDINGS, ENDPOINT__OCR, ENDPOINT__RERANK
 7 | 
 8 | from ._basemodelrouter import BaseModelRouter
 9 | 
10 | 
11 | class ModelRouter(BaseModelRouter):
12 |     ENDPOINT_MODEL_TYPE_TABLE = {
13 |         ENDPOINT__AUDIO_TRANSCRIPTIONS: [ModelType.AUTOMATIC_SPEECH_RECOGNITION],
14 |         ENDPOINT__CHAT_COMPLETIONS: [ModelType.TEXT_GENERATION, ModelType.IMAGE_TEXT_TO_TEXT],
15 |         ENDPOINT__EMBEDDINGS: [ModelType.TEXT_EMBEDDINGS_INFERENCE],
16 |         ENDPOINT__OCR: [ModelType.IMAGE_TEXT_TO_TEXT],
17 |         ENDPOINT__RERANK: [ModelType.TEXT_CLASSIFICATION],
18 |     }
19 | 
20 |     def __init__(
21 |         self,
22 |         id: str,
23 |         type: ModelType,
24 |         owned_by: str,
25 |         aliases: list[str],
26 |         routing_strategy: str,
27 |         clients: list[ModelClient],
28 |         *args,
29 |         **kwargs,
30 |     ) -> None:
31 |         super().__init__(id=id, type=type, owned_by=owned_by, aliases=aliases, routing_strategy=routing_strategy, clients=clients, *args, **kwargs)
32 | 
33 |     def get_client(self, endpoint: str) -> ModelClient:
34 |         if endpoint and self.type not in self.ENDPOINT_MODEL_TYPE_TABLE[endpoint]:
35 |             raise WrongModelTypeException()
36 | 
37 |         if self._routing_strategy == RoutingStrategy.ROUND_ROBIN:
38 |             strategy = RoundRobinRoutingStrategy(self._clients, self._cycle)
39 |         else:  # ROUTER_STRATEGY__SHUFFLE
40 |             strategy = ShuffleRoutingStrategy(self._clients)
41 | 
42 |         client = strategy.choose_model_client()
43 |         client.endpoint = endpoint
44 | 
45 |         return client
46 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | from ._baserountingstrategy import BaseRoutingStrategy
2 | from ._roundrobinroutingstrategy import RoundRobinRoutingStrategy
3 | from ._shuffleroutingstrategy import ShuffleRoutingStrategy
4 | 
5 | __all__ = ["BaseRoutingStrategy", "RoundRobinRoutingStrategy", "ShuffleRoutingStrategy"]
6 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/strategies/_baserountingstrategy.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import List
 3 | 
 4 | from app.clients.model import BaseModelClient as ModelClient
 5 | 
 6 | 
 7 | class BaseRoutingStrategy(ABC):
 8 |     def __init__(self, clients: List[ModelClient]) -> None:
 9 |         self.clients = clients
10 | 
11 |     @abstractmethod
12 |     def choose_model_client(self) -> ModelClient:
13 |         """
14 |         Choose a client among the model's clients list
15 | 
16 |         Returns:
17 |            BaseModelClient: The chosen client
18 |         """
19 |         pass
20 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/strategies/_roundrobinroutingstrategy.py:
--------------------------------------------------------------------------------
 1 | from typing import Iterator, List
 2 | 
 3 | from app.clients.model import BaseModelClient as ModelClient
 4 | from app.helpers.models.routers.strategies import BaseRoutingStrategy
 5 | 
 6 | 
 7 | class RoundRobinRoutingStrategy(BaseRoutingStrategy):
 8 |     def __init__(self, clients: List[ModelClient], cycle: Iterator[ModelClient]) -> None:
 9 |         super().__init__(clients)
10 |         self.cycle = cycle
11 | 
12 |     def choose_model_client(self) -> ModelClient:
13 |         return next(self.cycle)
14 | 


--------------------------------------------------------------------------------
/app/helpers/models/routers/strategies/_shuffleroutingstrategy.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List
 3 | 
 4 | from app.clients.model import BaseModelClient as ModelClient
 5 | from app.helpers.models.routers.strategies import BaseRoutingStrategy
 6 | 
 7 | 
 8 | class ShuffleRoutingStrategy(BaseRoutingStrategy):
 9 |     def __init__(self, clients: List[ModelClient]) -> None:
10 |         super().__init__(clients)
11 | 
12 |     def choose_model_client(self) -> ModelClient:
13 |         return random.choice(self.clients)
14 | 


--------------------------------------------------------------------------------
/app/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 | 
3 | 
4 | class BaseModel(BaseModel):
5 |     class Config:
6 |         extra = "allow"
7 | 


--------------------------------------------------------------------------------
/app/schemas/audio.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from openai.types.audio import Transcription
 4 | from pydantic import Field
 5 | 
 6 | from app.schemas import BaseModel
 7 | 
 8 | 
 9 | class AudioTranscription(Transcription):
10 |     id: str = Field(default=None, description="A unique identifier for the audio transcription.")
11 | 
12 | 
13 | class Word(BaseModel):
14 |     word: str
15 |     start: float
16 |     end: float
17 | 
18 | 
19 | class Segment(BaseModel):
20 |     id: int
21 |     seek: int
22 |     start: float
23 |     end: float
24 |     text: str
25 |     tokens: List[int]
26 |     temperature: float
27 |     avg_logprob: float
28 |     compression_ratio: float
29 |     no_speech_prob: float
30 | 
31 | 
32 | class AudioTranscriptionVerbose(AudioTranscription):
33 |     language: str
34 |     duration: float
35 |     text: str
36 |     words: List[Word]
37 |     segments: List[Segment]
38 | 


--------------------------------------------------------------------------------
/app/schemas/chunks.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Literal
 2 | 
 3 | from app.schemas import BaseModel
 4 | 
 5 | 
 6 | class Chunk(BaseModel):
 7 |     object: Literal["chunk"] = "chunk"
 8 |     id: int
 9 |     metadata: Dict[str, Any]
10 |     content: str
11 | 
12 | 
13 | class Chunks(BaseModel):
14 |     object: Literal["list"] = "list"
15 |     data: List[Chunk]
16 | 


--------------------------------------------------------------------------------
/app/schemas/collections.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Literal, Optional
 3 | 
 4 | from pydantic import Field, field_validator
 5 | 
 6 | from app.schemas import BaseModel
 7 | 
 8 | 
 9 | class CollectionVisibility(str, Enum):
10 |     PRIVATE = "private"
11 |     PUBLIC = "public"
12 | 
13 | 
14 | class CollectionRequest(BaseModel):
15 |     name: str = Field(min_length=1, description="The name of the collection.")
16 |     description: Optional[str] = Field(default=None, description="The description of the collection.")
17 |     visibility: CollectionVisibility = Field(default=CollectionVisibility.PRIVATE, description="The type of the collection. Public collections are available to all users, private collections are only available to the user who created them.")  # fmt: off
18 | 
19 |     @field_validator("name", mode="after")
20 |     def strip_name(cls, name):
21 |         if isinstance(name, str):
22 |             name = name.strip()
23 |             if not name:  # empty string
24 |                 raise ValueError("Name cannot be empty.")
25 | 
26 |         return name
27 | 
28 | 
29 | class CollectionUpdateRequest(BaseModel):
30 |     name: Optional[str] = Field(default=None, min_length=1, description="The name of the collection.")
31 |     description: Optional[str] = Field(default=None, description="The description of the collection.")
32 |     visibility: Optional[CollectionVisibility] = Field(default=None, description="The type of the collection. Public collections are available to all users, private collections are only available to the user who created them.")  # fmt: off
33 | 
34 |     @field_validator("name", mode="after")
35 |     def strip_name(cls, name):
36 |         if isinstance(name, str):
37 |             name = name.strip()
38 |             if not name:  # empty string
39 |                 raise ValueError("Name cannot be empty.")
40 | 
41 |         return name
42 | 
43 | 
44 | class Collection(BaseModel):
45 |     object: Literal["collection"] = "collection"
46 |     id: int
47 |     name: str
48 |     owner: str
49 |     description: Optional[str] = None
50 |     visibility: Optional[CollectionVisibility] = None
51 |     created_at: int
52 |     updated_at: int
53 |     documents: int = 0
54 | 
55 | 
56 | class Collections(BaseModel):
57 |     object: Literal["list"] = "list"
58 |     data: List[Collection]
59 | 


--------------------------------------------------------------------------------
/app/schemas/completions.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Iterable, List, Optional, Union
 2 | 
 3 | from openai.types import Completion
 4 | from pydantic import Field
 5 | 
 6 | from app.schemas import BaseModel
 7 | from app.schemas.usage import Usage
 8 | 
 9 | 
10 | class CompletionRequest(BaseModel):
11 |     prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]
12 |     model: str
13 |     best_of: Optional[int] = None
14 |     echo: Optional[bool] = False
15 |     frequency_penalty: Optional[float] = 0.0
16 |     logit_bias: Optional[Dict[str, float]] = None
17 |     logprobs: Optional[int] = None
18 |     max_tokens: Optional[int] = 16
19 |     n: Optional[int] = 1
20 |     presence_penalty: Optional[float] = 0.0
21 |     seed: Optional[int] = None
22 |     stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
23 |     stream: Optional[bool] = False
24 |     suffix: Optional[str] = None
25 |     temperature: Optional[float] = 1.0
26 |     top_p: Optional[float] = 1.0
27 |     user: Optional[str] = None
28 | 
29 | 
30 | class Completions(Completion):
31 |     id: str = Field(default=None, description="A unique identifier for the completion.")
32 |     usage: Usage = Field(default=None, description="Usage information for the request.")
33 | 


--------------------------------------------------------------------------------
/app/schemas/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/schemas/core/__init__.py


--------------------------------------------------------------------------------
/app/schemas/core/auth.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | from enum import Enum
 3 | 
 4 | 
 5 | class LimitingStrategy(str, Enum):
 6 |     MOVING_WINDOW = "moving_window"
 7 |     FIXED_WINDOW = "fixed_window"
 8 |     SLIDING_WINDOW = "sliding_window"
 9 | 
10 | 
11 | class UserModelLimits(BaseModel):
12 |     tpm: int = 0
13 |     tpd: int = 0
14 |     rpm: int = 0
15 |     rpd: int = 0
16 | 


--------------------------------------------------------------------------------
/app/schemas/core/context.py:
--------------------------------------------------------------------------------
 1 | from types import SimpleNamespace
 2 | from typing import Any, Optional
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | from app.schemas.usage import Usage
 7 | 
 8 | 
 9 | class GlobalContext(BaseModel):
10 |     tokenizer: Optional[Any] = None
11 |     models: Optional[Any] = None
12 |     iam: Optional[Any] = None
13 |     limiter: Optional[Any] = None
14 |     documents: Optional[Any] = None
15 |     parser: Optional[Any] = None
16 |     mcp: Optional[Any] = SimpleNamespace()
17 | 
18 |     class Config:
19 |         extra = "allow"
20 | 
21 | 
22 | class RequestContext(BaseModel):
23 |     id: Optional[str] = None
24 |     user_id: Optional[str] = None
25 |     role_id: Optional[str] = None
26 |     token_id: Optional[str] = None
27 |     method: Optional[str] = None
28 |     endpoint: Optional[str] = None
29 |     client: Optional[str] = None
30 |     usage: Optional[Usage] = None
31 | 
32 |     class Config:
33 |         extra = "allow"
34 | 


--------------------------------------------------------------------------------
/app/schemas/core/documents.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Any, Dict, List, Optional
 3 | 
 4 | from fastapi import UploadFile
 5 | from pydantic import BaseModel
 6 | 
 7 | from app.schemas.parse import Languages, ParsedDocumentOutputFormat
 8 | 
 9 | 
10 | class ParserParams(BaseModel):
11 |     file: UploadFile
12 |     output_format: Optional[ParsedDocumentOutputFormat] = None
13 |     force_ocr: bool = False
14 |     languages: Optional[Languages] = None
15 |     page_range: Optional[str] = None
16 |     paginate_output: bool = False
17 |     use_llm: bool = False
18 | 
19 | 
20 | class FileType(str, Enum):
21 |     PDF = "pdf"
22 |     HTML = "html"
23 |     JSON = "json"
24 |     MD = "md"
25 |     TXT = "txt"
26 | 
27 | 
28 | class JsonFileDocument(BaseModel):
29 |     title: Optional[str] = None
30 |     text: str
31 |     metadata: Dict[str, Any] = {}
32 | 
33 | 
34 | class JsonFile(BaseModel):
35 |     documents: List[JsonFileDocument]
36 | 


--------------------------------------------------------------------------------
/app/schemas/core/models.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Optional
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | from app.schemas.models import ModelType
 7 | 
 8 | 
 9 | class ModelClientType(str, Enum):
10 |     ALBERT = "albert"
11 |     OPENAI = "openai"
12 |     TEI = "tei"
13 |     VLLM = "vllm"
14 | 
15 |     @classmethod
16 |     def get_supported_clients(cls, model_type):
17 |         mapping = {
18 |             ModelType.AUTOMATIC_SPEECH_RECOGNITION: [cls.ALBERT.value, cls.OPENAI.value],
19 |             ModelType.IMAGE_TEXT_TO_TEXT: [cls.ALBERT.value, cls.OPENAI.value, cls.VLLM.value],
20 |             ModelType.TEXT_EMBEDDINGS_INFERENCE: [cls.ALBERT.value, cls.OPENAI.value, cls.TEI.value],
21 |             ModelType.TEXT_GENERATION: [cls.ALBERT.value, cls.OPENAI.value, cls.VLLM.value],
22 |             ModelType.TEXT_CLASSIFICATION: [cls.ALBERT.value, cls.TEI.value],
23 |         }
24 |         return mapping.get(model_type, [])
25 | 
26 | 
27 | class RoutingStrategy(str, Enum):
28 |     ROUND_ROBIN = "round_robin"
29 |     SHUFFLE = "shuffle"
30 | 
31 | class ModelClientCarbonImpactParams(BaseModel):
32 |     total: Optional[int] = None 
33 |     active: Optional[int] = None 


--------------------------------------------------------------------------------
/app/schemas/core/usage.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | import pycountry
4 | 
5 | CountryCodes = [country.alpha_3 for country in pycountry.countries]
6 | CountryCodes.append("WOR")  # Add world as a country code
7 | CountryCodes = {str(lang).upper(): str(lang) for lang in sorted(set(CountryCodes))}
8 | CountryCodes = Enum("CountryCodes", CountryCodes, type=str)
9 | 


--------------------------------------------------------------------------------
/app/schemas/documents.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Literal, Optional
 3 | 
 4 | from fastapi import Form
 5 | from pydantic import Field
 6 | 
 7 | from app.schemas import BaseModel
 8 | 
 9 | 
10 | class ChunkerName(str, Enum):
11 |     RECURSIVE_CHARACTER_TEXT_SPLITTER = "RecursiveCharacterTextSplitter"
12 |     NO_SPLITTER = "NoSplitter"
13 | 
14 | 
15 | CollectionForm: int = Form(default=..., description="The collection ID to use for the file upload. The file will be vectorized with model defined by the collection.")  # fmt: off
16 | ChunkerNameForm: ChunkerName = Form(default=ChunkerName.RECURSIVE_CHARACTER_TEXT_SPLITTER, description="The name of the chunker to use for the file upload.")  # fmt: off
17 | ChunkSizeForm: int = Form(default=2048, description="The size of the chunks to use for the file upload.")  # fmt: off
18 | ChunkOverlapForm: int = Form(default=0, description="The overlap of the chunks to use for the file upload.")  # fmt: off
19 | LengthFunctionForm: Literal["len"] = Form(default="len", description="The function to use to calculate the length of the chunks to use for the file upload.")  # fmt: off
20 | IsSeparatorRegexForm: bool = Form(default=False, description="Whether the separator is a regex to use for the file upload.")  # fmt: off
21 | SeparatorsForm: List[str] = Form(default=["\n\n", "\n", ". ", " "], description="The separators to use for the file upload.")  # fmt: off
22 | ChunkMinSizeForm: int = Form(default=0, description="The minimum size of the chunks to use for the file upload.")  # fmt: off
23 | MetadataForm: str = Form(default="", description="Additional metadata to chunks, JSON string.", pattern=r"^[^{}]*$")  # fmt: off
24 | 
25 | 
26 | class Document(BaseModel):
27 |     object: Literal["document"] = "document"
28 |     id: int
29 |     name: str
30 |     collection_id: int
31 |     created_at: int
32 |     chunks: Optional[int] = None
33 | 
34 | 
35 | class Documents(BaseModel):
36 |     object: Literal["list"] = "list"
37 |     data: List[Document]
38 | 
39 | 
40 | class DocumentResponse(BaseModel):
41 |     id: int = Field(default=..., description="The ID of the document created.")
42 | 


--------------------------------------------------------------------------------
/app/schemas/embeddings.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Literal, Optional, Union
 2 | 
 3 | from openai.types import CreateEmbeddingResponse
 4 | from pydantic import Field, field_validator
 5 | 
 6 | from app.schemas import BaseModel
 7 | from app.schemas.usage import Usage
 8 | 
 9 | 
10 | class OpenAIBaseModel(BaseModel):
11 |     class Config:
12 |         extra = "allow"
13 | 
14 | 
15 | class EmbeddingsRequest(OpenAIBaseModel):
16 |     input: Union[List[int], List[List[int]], str, List[str]] = Field(default=..., description="Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (call `/v1/models` endpoint to get the `max_context_length` by model) and cannot be an empty string.")  # fmt: off
17 |     model: str = Field(default=..., description="ID of the model to use. Call `/v1/models` endpoint to get the list of available models, only `text-embeddings-inference` model type is supported.")  # fmt: off
18 |     dimensions: Optional[int] = Field(default=None, description="The number of dimensions the resulting output embeddings should have.")  # fmt: off
19 |     encoding_format: Optional[Literal["float"]] = Field(default="float", description="The format of the output embeddings. Only `float` is supported.")  # fmt: off
20 | 
21 |     @field_validator("input")
22 |     def validate_input(cls, input):
23 |         assert input, "input must not be an empty string"
24 |         return input
25 | 
26 | 
27 | class Embeddings(CreateEmbeddingResponse):
28 |     id: str = Field(default=None, description="A unique identifier for the embedding.")
29 |     usage: Usage = Field(default=None, description="Usage information for the request.")
30 | 


--------------------------------------------------------------------------------
/app/schemas/files.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import List, Literal
 3 | 
 4 | from pydantic import Field, field_validator, model_validator
 5 | 
 6 | from app.schemas import BaseModel
 7 | from app.schemas.documents import ChunkerName
 8 | 
 9 | 
10 | class ChunkerArgs(BaseModel):
11 |     chunk_size: int = Field(default=2048, description="The size of the chunks to use for the file upload.")  # fmt: off
12 |     chunk_overlap: int = Field(default=0, description="The overlap of the chunks to use for the file upload.")  # fmt: off
13 |     length_function: Literal["len"] = Field(default="len", description="The function to use to calculate the length of the chunks to use for the file upload.")  # fmt: off
14 |     is_separator_regex: bool = Field(default=False, description="Whether the separator is a regex to use for the file upload.")  # fmt: off
15 |     separators: List[str] = Field(default=["\n\n", "\n", ". ", " "], description="The separators to use for the file upload.")  # fmt: off
16 |     chunk_min_size: int = Field(default=0, description="The minimum size of the chunks to use for the file upload.")  # fmt: off
17 | 
18 | 
19 | class Chunker(BaseModel):
20 |     name: Literal[ChunkerName.RECURSIVE_CHARACTER_TEXT_SPLITTER, ChunkerName.NO_SPLITTER, "LangchainRecursiveCharacterTextSplitter", "NoChunker"] = Field(default=ChunkerName.RECURSIVE_CHARACTER_TEXT_SPLITTER, description="The name of the chunker to use for the file upload.")  # fmt: off
21 |     args: ChunkerArgs = Field(default_factory=ChunkerArgs, description="The arguments to use for the chunker to use for the file upload.")  # fmt: off
22 | 
23 |     @field_validator("name")
24 |     def validate_name(cls, name):
25 |         if name == "LangchainRecursiveCharacterTextSplitter":
26 |             name = ChunkerName.RECURSIVE_CHARACTER_TEXT_SPLITTER
27 |         elif name == "NoChunker":
28 |             name = ChunkerName.NO_SPLITTER
29 |         return name
30 | 
31 | 
32 | class FileResponse(BaseModel):
33 |     id: int = Field(default=..., description="The ID of the file.")
34 | 
35 | 
36 | class FilesRequest(BaseModel):
37 |     collection: int = Field(default=..., description="The collection ID to use for the file upload. The file will be vectorized with model defined by the collection.")  # fmt: off
38 |     chunker: Chunker = Field(default_factory=Chunker, description="The chunker to use for the file upload.")  # fmt: off
39 | 
40 |     @model_validator(mode="before")
41 |     @classmethod
42 |     def convert_form_to_json(cls, values):
43 |         if isinstance(values, str):
44 |             return cls(**json.loads(values))
45 |         return values
46 | 


--------------------------------------------------------------------------------
/app/schemas/models.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Literal, Optional
 3 | 
 4 | from openai.types import Model
 5 | from pydantic import Field
 6 | 
 7 | from app.schemas import BaseModel
 8 | 
 9 | 
10 | class ModelCosts(BaseModel):
11 |     prompt_tokens: float = Field(default=0.0, ge=0.0, description="Cost of a million prompt tokens (decrease user budget)")
12 |     completion_tokens: float = Field(default=0.0, ge=0.0, description="Cost of a million completion tokens (decrease user budget)")
13 | 
14 | 
15 | class ModelType(str, Enum):
16 |     IMAGE_TEXT_TO_TEXT = "image-text-to-text"
17 |     AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
18 |     TEXT_EMBEDDINGS_INFERENCE = "text-embeddings-inference"
19 |     TEXT_GENERATION = "text-generation"
20 |     TEXT_CLASSIFICATION = "text-classification"
21 | 
22 | 
23 | class Model(Model):
24 |     object: Literal["model"] = "model"
25 |     max_context_length: Optional[int] = None
26 |     type: ModelType
27 |     aliases: Optional[List[str]] = []
28 |     costs: ModelCosts
29 | 
30 | 
31 | class Models(BaseModel):
32 |     object: Literal["list"] = "list"
33 |     data: List[Model]
34 | 


--------------------------------------------------------------------------------
/app/schemas/ocr.py:
--------------------------------------------------------------------------------
 1 | from fastapi import Form
 2 | 
 3 | DEFAULT_PROMPT = """Tu es un système d'OCR très précis. Extrait tout le texte visible de cette image. 
 4 | Ne décris pas l'image, n'ajoute pas de commentaires. Réponds uniquement avec le texte brut extrait, 
 5 | en préservant les paragraphes, la mise en forme et la structure du document. 
 6 | Si aucun texte n'est visible, réponds avec 'Aucun texte détecté'. 
 7 | Je veux une sortie au format markdown. Tu dois respecter le format de sortie pour bien conserver les tableaux."""
 8 | 
 9 | 
10 | ModelForm: str = Form(default=..., description="The model to use for the OCR.")  # fmt: off
11 | DPIForm: int = Form(default=150, ge=100, le=600, description="The DPI to use for the OCR (each page will be rendered as an image at this DPI).")  # fmt: off
12 | PromptForm: str = Form(default=DEFAULT_PROMPT, description="The prompt to use for the OCR.")  # fmt: off
13 | 


--------------------------------------------------------------------------------
/app/schemas/parse.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List, Literal, Optional
 3 | 
 4 | from fastapi import File, Form, UploadFile
 5 | from pydantic import Field
 6 | 
 7 | from app.schemas import BaseModel
 8 | from app.schemas.usage import Usage
 9 | from app.utils.variables import AUDIO_SUPPORTED_LANGUAGES
10 | 
11 | LANGUAGES = {key.title(): value for key, value in AUDIO_SUPPORTED_LANGUAGES.items()}
12 | LANGUAGES = list(LANGUAGES.keys()) + list(LANGUAGES.values())
13 | LANGUAGES = {str(lang).upper(): str(lang) for lang in sorted(set(LANGUAGES))}
14 | 
15 | Languages = Enum("Language", LANGUAGES, type=str)
16 | 
17 | 
18 | class ParsedDocumentOutputFormat(str, Enum):
19 |     MARKDOWN = "markdown"
20 |     JSON = "json"
21 |     HTML = "html"
22 | 
23 | 
24 | class ParsedDocumentMetadata(BaseModel):
25 |     document_name: str
26 |     page: int = 0
27 | 
28 | 
29 | class ParsedDocumentPage(BaseModel):
30 |     object: Literal["documentPage"] = "documentPage"
31 |     content: str
32 |     images: dict[str, str]
33 |     metadata: ParsedDocumentMetadata
34 | 
35 | 
36 | class ParsedDocument(BaseModel):
37 |     object: Literal["list"] = "list"
38 |     data: List[ParsedDocumentPage]
39 |     usage: Usage = Field(default=None, description="Usage information for the request.")
40 | 
41 | 
42 | FileForm: UploadFile = File(..., description="The file to parse.")  # fmt: off
43 | PaginateOutputForm: Optional[bool] = Form(default=False, description="Whether to paginate the output.  Defaults to False.  If set to True, each page of the output will be separated by a horizontal rule that contains the page number (2 newlines, {PAGE_NUMBER}, 48 - characters, 2 newlines).")  # fmt: off
44 | PageRangeForm: str = Form(default="", description="Page range to convert, specify comma separated page numbers or ranges. Example: '0,5-10,20'", examples=["0,5-10,20"], pattern=r"^(([0-9]+-[0-9]+|[0-9]+)(,([0-9]+-[0-9]+|[0-9]+))*)?$")  # fmt: off
45 | LanguagesForm: Optional[Languages] = Form(default=Languages.FR, description="Comma separated list of languages to use for OCR. Must be either the names or codes from from https://github.com/VikParuchuri/surya/blob/master/surya/recognition/languages.py.", examples=["fr"])  # fmt: off
46 | ForceOCRForm: bool = Form(default=False, description="Force OCR on all pages of the PDF.  Defaults to False.  This can lead to worse results if you have good text in your PDFs (which is true in most cases).")  # fmt: off
47 | OutputFormatForm: ParsedDocumentOutputFormat = Form(default=ParsedDocumentOutputFormat.MARKDOWN, description="The format to output the text in.  Can be 'markdown', 'json', or 'html'.  Defaults to 'markdown'.")  # fmt: off
48 | UseLLMForm: Optional[bool] = Form(default=False, description="Use LLM to improve conversion accuracy. Requires API key if using external services.")  # fmt: off
49 | 


--------------------------------------------------------------------------------
/app/schemas/rerank.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Literal
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.schemas import BaseModel
 6 | from app.schemas.usage import Usage
 7 | 
 8 | 
 9 | class RerankRequest(BaseModel):
10 |     prompt: str = Field(default=..., description="The prompt to use for the reranking.")  # fmt: off
11 |     input: List[str] = Field(default=..., description="List of input texts to rerank by relevance to the prompt.")  # fmt: off
12 |     model: str = Field(default=..., description="The model to use for the reranking, call `/v1/models` endpoint to get the list of available models, only `text-classification` model type is supported.")  # fmt: off
13 | 
14 | 
15 | class Rerank(BaseModel):
16 |     object: Literal["rerank"] = "rerank"
17 |     score: float
18 |     index: int
19 | 
20 | 
21 | class Reranks(BaseModel):
22 |     id: str = Field(default=None, description="A unique identifier for the reranking.")
23 |     object: Literal["list"] = "list"
24 |     data: List[Rerank]
25 |     usage: Usage = Field(default=None, description="Usage information for the request.")
26 | 


--------------------------------------------------------------------------------
/app/schemas/search.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Any, List, Literal, Optional
 3 | 
 4 | from pydantic import Field, field_validator, model_validator
 5 | 
 6 | from app.schemas import BaseModel
 7 | from app.schemas.chunks import Chunk
 8 | from app.schemas.usage import Usage
 9 | from app.utils.exceptions import WrongSearchMethodException
10 | 
11 | 
12 | class SearchMethod(str, Enum):
13 |     HYBRID = "hybrid"
14 |     LEXICAL = "lexical"
15 |     SEMANTIC = "semantic"
16 |     MULTIAGENT = "multiagent"
17 | 
18 | 
19 | class SearchArgs(BaseModel):
20 |     collections: List[Any] = Field(default=[], description="List of collections ID")
21 |     rff_k: int = Field(default=20, description="k constant in RFF algorithm")
22 |     k: int = Field(gt=0, default=4, description="Number of results to return")
23 |     method: SearchMethod = Field(default=SearchMethod.SEMANTIC)
24 |     score_threshold: Optional[float] = Field(default=0.0, ge=0.0, le=1.0, description="Score of cosine similarity threshold for filtering results, only available for semantic search method.")  # fmt: off
25 |     web_search: bool = Field(default=False, description="Whether add internet search to the results.")
26 | 
27 |     @model_validator(mode="after")
28 |     def score_threshold_filter(cls, values):
29 |         if values.score_threshold and values.method not in (SearchMethod.SEMANTIC, SearchMethod.MULTIAGENT):
30 |             raise WrongSearchMethodException(detail="Score threshold is only available for semantic and multiagent search methods.")
31 |         return values
32 | 
33 | 
34 | class SearchRequest(SearchArgs):
35 |     prompt: str = Field(description="Prompt related to the search")
36 | 
37 |     @field_validator("prompt")
38 |     def blank_string(prompt) -> str:
39 |         if prompt.strip() == "":
40 |             raise ValueError("Prompt cannot be empty")
41 |         return prompt
42 | 
43 | 
44 | class Search(BaseModel):
45 |     method: SearchMethod
46 |     score: float
47 |     chunk: Chunk
48 | 
49 | 
50 | class Searches(BaseModel):
51 |     object: Literal["list"] = "list"
52 |     data: List[Search]
53 |     usage: Usage = Field(default=None, description="Usage information for the request.")
54 | 


--------------------------------------------------------------------------------
/app/schemas/usage.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from app.schemas import BaseModel
 6 | 
 7 | 
 8 | class CarbonFootprintUsageKWh(BaseModel):
 9 |     min: Optional[float] = Field(default=None, description="Minimum carbon footprint in kWh.")
10 |     max: Optional[float] = Field(default=None, description="Maximum carbon footprint in kWh.")
11 | 
12 | 
13 | class CarbonFootprintUsageKgCO2eq(BaseModel):
14 |     min: Optional[float] = Field(default=None, description="Minimum carbon footprint in kgCO2eq (global warming potential).")
15 |     max: Optional[float] = Field(default=None, description="Maximum carbon footprint in kgCO2eq (global warming potential).")
16 | 
17 | 
18 | class CarbonFootprintUsage(BaseModel):
19 |     kWh: CarbonFootprintUsageKWh = Field(default_factory=CarbonFootprintUsageKWh)
20 |     kgCO2eq: CarbonFootprintUsageKgCO2eq = Field(default_factory=CarbonFootprintUsageKgCO2eq)
21 | 
22 | 
23 | class BaseUsage(BaseModel):
24 |     prompt_tokens: int = Field(default=0, description="Number of prompt tokens (e.g. input tokens).")
25 |     completion_tokens: int = Field(default=0, description="Number of completion tokens (e.g. output tokens).")
26 |     total_tokens: int = Field(default=0, description="Total number of tokens (e.g. input and output tokens).")
27 |     cost: float = Field(default=0.0, description="Total cost of the request.")
28 |     carbon: CarbonFootprintUsage = Field(default_factory=CarbonFootprintUsage)
29 | 
30 | 
31 | class Detail(BaseModel):
32 |     id: str
33 |     model: str
34 |     usage: BaseUsage = Field(default_factory=BaseUsage)
35 | 
36 | 
37 | class Usage(BaseUsage):
38 |     details: List[Detail] = []
39 | 


--------------------------------------------------------------------------------
/app/sql/session.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
 2 | from sqlalchemy.orm import sessionmaker
 3 | 
 4 | from app.utils.settings import settings
 5 | 
 6 | 
 7 | engine = create_async_engine(**settings.databases.sql.args)
 8 | async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
 9 | 
10 | 
11 | async def get_db():
12 |     async with async_session() as session:
13 |         try:
14 |             yield session
15 |         finally:
16 |             await session.close()
17 | 


--------------------------------------------------------------------------------
/app/tests/integ/assets/audio.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/assets/audio.mp3


--------------------------------------------------------------------------------
/app/tests/integ/assets/audio.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/assets/audio.wav


--------------------------------------------------------------------------------
/app/tests/integ/assets/json_wrong_format.json:
--------------------------------------------------------------------------------
1 | {
2 |     "fruit": "Apple",
3 |     "size": "Large",
4 |     "color": "Red"
5 | }
6 | 


--------------------------------------------------------------------------------
/app/tests/integ/assets/markdown.md:
--------------------------------------------------------------------------------
 1 | # Dauphinois de potimarron - recette végétarienne cuisine-libre.org
 2 | 
 3 | ## Description
 4 | 
 5 | Préparation : 20 min Cuisson : 1 h [Four](https://www.cuisine-libre.org/four)
 6 | 
 7 | ![Végétarien](https://www.cuisine-libre.org/local/cache-vignettes/L40xH40/moton18-9d595.png?1644794211 "Végétarien")
 8 | 
 9 | Sans viande Sans œuf
10 | 
11 | Dauphinois de potimarron Rated 5.00 out of 5 based on 2 ratings.
12 | 
13 | ![](https://www.cuisine-libre.org/local/cache-gd2/6d/60311ebc0c8cb1dfbbe3e5cf92e9fd.jpg?1675005547)
14 | 
15 | ![Appétissante photo DR](https://www.cuisine-libre.org/local/cache-gd2/c6/f3d3dd24ed5a690a2e6ad481f8a95c.jpg?1675005547)
16 | 
17 | ## Ingrédients pour 4
18 | 
19 | - potimarron de 1 kg (ou plus)
20 | - crème fraiche liquide (fleurette)
21 | - ail
22 | - beurre
23 | - sel, poivre
24 | 
25 | ## Préparation
26 | 
27 | Préchauffer le four à 180/200°C.
28 | 
29 | Couper le potimarron en « taillons » de quelques millimètres d’épaisseur.  
30 | Frotter d’une gousse d’ail épluchée un plat à four en terre. Y répartir les « taillons » en couches, saler et poivrer entre chaque couche. Verser la crème, qui doit juste couvrir le potimarron (jusqu’à un litre en fonction de la taille du plat). Parsemer de quelques noisettes de beurre, pour le gratiné final.
31 | 
32 | Cuire une heure environ, forcer à 220°C les dix dernières minutes.
33 | 
34 | ## <:info_post_scriptum:>
35 | 
36 | Un plat qui se réchauffe plus facilement que le vrai [gratin dauphinois](https://www.cuisine-libre.org/gratin-dauphinois) (aux pommes de terre). Cette recette m’a été proposée par ma productrice de légumes préférée : c’est la recette du gratin dauphinois appliquée à la courge. Elle convient au potimarron et aux variétés de citrouilles fermes.
37 | 
38 | - [![](https://www.cuisine-libre.org/local/cache-gd2/13/f50d96b2f12916e2df6b65f1bd381c.jpg?1644794690)Potimarron](https://www.cuisine-libre.org/potimarron)
39 | 


--------------------------------------------------------------------------------
/app/tests/integ/assets/pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/assets/pdf.pdf


--------------------------------------------------------------------------------
/app/tests/integ/assets/pdf_too_large.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/assets/pdf_too_large.pdf


--------------------------------------------------------------------------------
/app/tests/integ/fixtures/fixtures.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | 
 3 | DEFAULT_LLM_RESPONSE = {
 4 |     "id": "chatcmpl-9361ad51d8f04a46a6bb83ea54d2f25c",
 5 |     "choices": [
 6 |         {
 7 |             "finish_reason": "stop",
 8 |             "index": 0,
 9 |             "logprobs": None,
10 |             "message": {
11 |                 "content": "Salut, je suis albert-API",
12 |                 "refusal": None,
13 |                 "role": "",
14 |                 "audio": None,
15 |                 "function_call": None,
16 |                 "tool_calls": [],
17 |                 "reasoning_content": None,
18 |             },
19 |             "stop_reason": None,
20 |         }
21 |     ],
22 |     "created": 1747410275,
23 |     "model": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
24 |     "object": "chat.completion",
25 |     "service_tier": None,
26 |     "system_fingerprint": None,
27 |     "usage": {"completion_tokens": 36, "prompt_tokens": 241, "total_tokens": 277, "completion_tokens_details": None, "prompt_tokens_details": None},
28 |     "search_results": [],
29 |     "prompt_logprobs": None,
30 | }
31 | 
32 | DEFAULT_MCP_TOOLS = {
33 |     "weather": {
34 |         "_meta": None,
35 |         "nextCursor": None,
36 |         "tools": [
37 |             {
38 |                 "name": "get_alerts",
39 |                 "description": "Get weather alerts for a US state.\n\n    Args:\n        state: Two-letter US state code (e.g. CA, NY)\n    ",
40 |                 "inputSchema": {
41 |                     "properties": {"state": {"title": "State", "type": "string"}},
42 |                     "required": ["state"],
43 |                     "title": "get_alertsArguments",
44 |                     "type": "object",
45 |                 },
46 |                 "annotations": None,
47 |             },
48 |             {
49 |                 "name": "get_forecast",
50 |                 "description": "Get weather forecast for a location.\n\n    Args:\n        latitude: Latitude of the location\n        longitude: Longitude of the location\n    ",
51 |                 "inputSchema": {
52 |                     "properties": {"latitude": {"title": "Latitude", "type": "number"}, "longitude": {"title": "Longitude", "type": "number"}},
53 |                     "required": ["latitude", "longitude"],
54 |                     "title": "get_forecastArguments",
55 |                     "type": "object",
56 |                 },
57 |                 "annotations": None,
58 |             },
59 |         ],
60 |     }
61 | }
62 | 
63 | 
64 | def merge_with_defaults(user_data, default_data):
65 |     if isinstance(default_data, dict):
66 |         result = {}
67 |         for key in default_data:
68 |             if key in user_data:
69 |                 result[key] = merge_with_defaults(user_data[key], default_data[key])
70 |             else:
71 |                 result[key] = deepcopy(default_data[key])
72 |         return result
73 |     elif isinstance(default_data, list) and default_data:
74 |         if isinstance(user_data, list):
75 |             return [merge_with_defaults(item, default_data[0]) for item in user_data]
76 |         else:
77 |             return deepcopy(default_data)
78 |     else:
79 |         return user_data if user_data is not None else deepcopy(default_data)
80 | 
81 | 
82 | def generate_mocked_llm_response(**kwargs):
83 |     return merge_with_defaults(kwargs, DEFAULT_LLM_RESPONSE)
84 | 
85 | 
86 | def generate_mocked_mcp_bridge_tools(**kwargs):
87 |     return merge_with_defaults(kwargs, DEFAULT_MCP_TOOLS)
88 | 


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_invalid_model/audio_transcriptions_invalid_model:
--------------------------------------------------------------------------------
1 | {"detail":"Model not found."}


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_mp3/audio_transcriptions_mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_mp3/audio_transcriptions_mp3


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_text_output/audio_transcriptions_text_output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_text_output/audio_transcriptions_text_output


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_wav/audio_transcriptions_wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/snapshots/test_audio/test_audio_transcriptions_wav/audio_transcriptions_wav


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_ocr/test_ocr_invalid_file_type/ocr_invalid_file_type:
--------------------------------------------------------------------------------
1 | {'detail': 'Unsupported file type.'}


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_ocr/test_ocr_too_large_file/ocr_too_large_file:
--------------------------------------------------------------------------------
1 | {'detail': 'File size limit exceeded (max: 20971520 bytes).'}


--------------------------------------------------------------------------------
/app/tests/integ/snapshots/test_ocr/test_ocr_without_authentication/ocr_without_authentication:
--------------------------------------------------------------------------------
1 | {'detail': 'Not authenticated'}


--------------------------------------------------------------------------------
/app/tests/integ/test_chunks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from uuid import uuid4
 3 | 
 4 | from fastapi.testclient import TestClient
 5 | import pytest
 6 | 
 7 | from app.schemas.chunks import Chunks
 8 | from app.schemas.collections import CollectionVisibility
 9 | from app.utils.variables import ENDPOINT__CHUNKS, ENDPOINT__COLLECTIONS, ENDPOINT__DOCUMENTS, ENDPOINT__FILES
10 | 
11 | 
12 | @pytest.fixture(scope="module")
13 | def setup(client: TestClient):
14 |     # Create a collection
15 |     response = client.post_without_permissions(
16 |         url=f"/v1{ENDPOINT__COLLECTIONS}",
17 |         json={"name": f"test_collection_{uuid4()}", "visibility": CollectionVisibility.PRIVATE},
18 |     )
19 |     assert response.status_code == 201
20 |     COLLECTION_ID = response.json()["id"]
21 | 
22 |     # Upload a file
23 |     file_path = "app/tests/integ/assets/json.json"
24 |     with open(file_path, "rb") as file:
25 |         files = {"file": (os.path.basename(file_path), file, "application/json")}
26 |         data = {"request": '{"collection": "%s"}' % COLLECTION_ID}
27 |         response = client.post_without_permissions(url=f"/v1{ENDPOINT__FILES}", data=data, files=files)
28 |         file.close()
29 |     assert response.status_code == 201, response.text
30 | 
31 |     # Retrieve the document ID
32 |     response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}", params={"collection": COLLECTION_ID})
33 |     assert response.status_code == 200, response.text
34 |     DOCUMENT_ID = response.json()["data"][0]["id"]
35 | 
36 |     yield COLLECTION_ID, DOCUMENT_ID
37 | 
38 | 
39 | @pytest.mark.usefixtures("client", "setup")
40 | class TestChunks:
41 |     def test_get_chunks(self, client: TestClient, setup):
42 |         COLLECTION_ID, DOCUMENT_ID = setup
43 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__CHUNKS}/{DOCUMENT_ID}")
44 |         assert response.status_code == 200, response.text
45 | 
46 |         chunks = Chunks(**response.json())  # test output format
47 | 
48 |         assert len(chunks.data) > 0
49 |         assert chunks.data[0].metadata["document_id"] == DOCUMENT_ID
50 | 
51 |     def test_delete_chunks(self, client: TestClient, setup):
52 |         COLLECTION_ID, DOCUMENT_ID = setup
53 |         response = client.delete_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}/{DOCUMENT_ID}")
54 |         assert response.status_code == 204, response.text
55 | 
56 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__CHUNKS}/{DOCUMENT_ID}")
57 |         assert response.status_code == 404, response.text
58 | 
59 |     def test_chunk_not_found(self, client: TestClient, setup):
60 |         COLLECTION_ID, DOCUMENT_ID = setup
61 |         document_id = 1000
62 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__CHUNKS}/{document_id}")
63 |         assert response.status_code == 404, response.text
64 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_documents.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from uuid import uuid4
 4 | 
 5 | from fastapi.testclient import TestClient
 6 | import pytest
 7 | 
 8 | from app.schemas.collections import CollectionVisibility
 9 | from app.schemas.documents import Document, Documents
10 | from app.utils.variables import ENDPOINT__COLLECTIONS, ENDPOINT__DOCUMENTS, ENDPOINT__FILES
11 | 
12 | 
13 | @pytest.fixture(scope="module")
14 | def setup(client):
15 |     response = client.post_without_permissions(
16 |         url=f"/v1{ENDPOINT__COLLECTIONS}",
17 |         json={"name": f"test_collection_{str(uuid4())}", "visibility": CollectionVisibility.PRIVATE},
18 |     )
19 |     assert response.status_code == 201, response.text
20 |     COLLECTION_ID = response.json()["id"]
21 | 
22 |     file_path = "app/tests/integ/assets/json.json"
23 |     with open(file_path, "rb") as file:
24 |         files = {"file": (os.path.basename(file_path), file, "application/json")}
25 |         data = {"request": '{"collection": "%s"}' % COLLECTION_ID}
26 |         response = client.post_without_permissions(url=f"/v1{ENDPOINT__FILES}", data=data, files=files)
27 |         file.close()
28 |     assert response.status_code == 201, response.text
29 | 
30 |     DOCUMENT_ID = response.json()["id"]
31 | 
32 |     yield COLLECTION_ID, DOCUMENT_ID
33 | 
34 | 
35 | @pytest.mark.usefixtures("client", "setup")
36 | class TestDocuments:
37 |     def test_get_document(self, client: TestClient, setup):
38 |         COLLECTION_ID, DOCUMENT_ID = setup
39 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}", params={"collection": COLLECTION_ID})
40 |         assert response.status_code == 200, response.text
41 | 
42 |         documents = [document for document in response.json()["data"] if document["id"] == DOCUMENT_ID]
43 |         assert len(documents) == 1
44 | 
45 |     def test_format_document(self, client: TestClient, setup):
46 |         COLLECTION_ID, DOCUMENT_ID = setup
47 | 
48 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}")
49 |         assert response.status_code == 200, response.text
50 | 
51 |         documents = response.json()
52 |         Documents(**documents)  # test output format
53 | 
54 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}", params={"collection": COLLECTION_ID})
55 |         assert response.status_code == 200, response.text
56 | 
57 |         documents = response.json()
58 |         Documents(**documents)  # test output format
59 | 
60 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}/{DOCUMENT_ID}")
61 |         assert response.status_code == 200, response.text
62 | 
63 |         document = response.json()
64 |         Document(**document)  # test output format
65 | 
66 |     def test_collection_document_count(self, client: TestClient, setup):
67 |         COLLECTION_ID, DOCUMENT_ID = setup
68 | 
69 |         with open("app/tests/integ/assets/json.json", "r") as f:
70 |             data = json.load(f)
71 |             document_count = len(data)
72 | 
73 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__COLLECTIONS}/{COLLECTION_ID}")
74 |         collection = response.json()
75 |         assert collection["documents"] == document_count
76 | 
77 |     def test_delete_document(self, client: TestClient, setup):
78 |         COLLECTION_ID, DOCUMENT_ID = setup
79 | 
80 |         response = client.delete_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}/{DOCUMENT_ID}")
81 |         assert response.status_code == 204, response.text
82 | 
83 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__DOCUMENTS}")
84 |         documents = response.json()["data"]
85 |         assert DOCUMENT_ID not in [document["id"] for document in documents]
86 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_identityaccessmanager.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from app.helpers._identityaccessmanager import IdentityAccessManager
 3 | from app.schemas.auth import Limit, PermissionType
 4 | 
 5 | 
 6 | @pytest.mark.usefixtures("client", "async_db_session")
 7 | class TestIdentityAccessManager:
 8 |     @pytest.mark.asyncio
 9 |     async def test_update_role(self, async_db_session, client):
10 |         """Test the update_role function of IdentityAccessManager."""
11 |         async for session in async_db_session:
12 |             iam = IdentityAccessManager()
13 | 
14 |             # Create a role to update
15 |             role_name = "test-role"
16 |             limits = [
17 |                 Limit(model="test-model", type="rpm", value=100),
18 |                 Limit(model="test-model", type="rpd", value=1000),
19 |             ]
20 |             permissions = [PermissionType.CREATE_ROLE, PermissionType.CREATE_USER]
21 | 
22 |             role_id = await iam.create_role(
23 |                 session=session,
24 |                 name=role_name,
25 |                 limits=limits,
26 |                 permissions=permissions,
27 |             )
28 | 
29 |             # Update the role
30 |             new_name = "updated-role"
31 |             new_limits = [
32 |                 Limit(model="new-model", type="rpm", value=200),
33 |             ]
34 |             new_permissions = [PermissionType.DELETE_ROLE]
35 | 
36 |             await iam.update_role(
37 |                 session=session,
38 |                 role_id=role_id,
39 |                 name=new_name,
40 |                 limits=new_limits,
41 |                 permissions=new_permissions,
42 |             )
43 | 
44 |             # Fetch the updated role
45 |             roles = await iam.get_roles(session=session, role_id=role_id)
46 |             updated_role = roles[0]
47 | 
48 |             assert updated_role.name == new_name
49 |             assert len(updated_role.limits) == len(new_limits)
50 |             assert updated_role.limits[0].model == "new-model"
51 |             assert updated_role.limits[0].type == "rpm"
52 |             assert updated_role.limits[0].value == 200
53 |             assert len(updated_role.permissions) == len(new_permissions)
54 |             assert updated_role.permissions[0] == PermissionType.DELETE_ROLE
55 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_models.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.schemas.models import Model, Models
 4 | from app.utils.settings import settings
 5 | from fastapi.testclient import TestClient
 6 | from app.utils.variables import ENDPOINT__MODELS
 7 | 
 8 | 
 9 | @pytest.mark.usefixtures("client")
10 | class TestModels:
11 |     def test_get_models_response_status_code(self, client: TestClient):
12 |         """Test the GET /models response status code."""
13 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}")
14 |         assert response.status_code == 200, f"error: retrieve models ({response.status_code})"
15 | 
16 |         models = Models(data=[Model(**model) for model in response.json()["data"]])
17 |         assert isinstance(models, Models)
18 |         assert all(isinstance(model, Model) for model in models.data)
19 | 
20 |         model = models.data[0].id
21 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}/{model}")
22 |         assert response.status_code == 200, f"error: retrieve model ({response.status_code})"
23 | 
24 |         model = Model(**response.json())
25 |         assert isinstance(model, Model)
26 | 
27 |     def test_get_models_non_existing_model(self, client: TestClient):
28 |         """Test the GET /models response status code for a non-existing model."""
29 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}/non-existing-model")
30 |         assert response.status_code == 404, f"error: retrieve non-existing model ({response.status_code})"
31 | 
32 |     def test_get_models_aliases(self, client: TestClient):
33 |         """Test the GET /models response status code for a non-existing model."""
34 |         model = settings.models[0]
35 | 
36 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}/{model.id}")
37 |         assert response.json()["aliases"] == model.aliases
38 | 
39 |         response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}/{model.aliases[0]}")
40 |         assert response.json()["id"] == model.id
41 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_parsing.py:
--------------------------------------------------------------------------------
 1 | # Test à faire, regarder le test ocr
 2 | # Faire des tests pour le endpoint parse, le client marker ? la classe parse ?
 3 | 
 4 | 
 5 | import os
 6 | 
 7 | import pytest
 8 | from fastapi.testclient import TestClient
 9 | 
10 | from app.utils.variables import ENDPOINT__PARSE
11 | 
12 | current_path = os.path.dirname(__file__)
13 | 
14 | 
15 | @pytest.mark.usefixtures("client")
16 | class TestParsingEndpoint:
17 |     def test_parser_pdf_successful(self, client: TestClient):
18 |         """Test successful OCR processing of a PDF file."""
19 | 
20 |         file_path = os.path.join(current_path, "assets/pdf.pdf")
21 |         with open(file_path, "rb") as file:
22 |             files = {"file": (os.path.basename(file_path), file, "application/pdf")}
23 |             response = client.post_without_permissions(f"/v1{ENDPOINT__PARSE}", files=files)
24 | 
25 |         assert response.status_code == 200, response.data[0].content
26 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_rerank.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from fastapi.testclient import TestClient
 4 | import pytest
 5 | 
 6 | from app.schemas.models import ModelType
 7 | from app.schemas.rerank import Reranks
 8 | from app.utils.variables import ENDPOINT__MODELS, ENDPOINT__RERANK
 9 | 
10 | 
11 | @pytest.fixture(scope="module")
12 | def setup(client: TestClient):
13 |     response = client.get_without_permissions(url=f"/v1{ENDPOINT__MODELS}")
14 |     assert response.status_code == 200, f"error: retrieve models ({response.status_code})"
15 |     response_json = response.json()
16 | 
17 |     RERANK_MODEL_ID = [model["id"] for model in response_json["data"] if model["type"] == ModelType.TEXT_CLASSIFICATION][0]
18 |     logging.info(f"test model ID: {RERANK_MODEL_ID}")
19 | 
20 |     EMBEDDINGS_MODEL_ID = [model["id"] for model in response_json["data"] if model["type"] == ModelType.TEXT_EMBEDDINGS_INFERENCE][0]
21 |     logging.info(f"test model ID: {EMBEDDINGS_MODEL_ID}")
22 | 
23 |     yield RERANK_MODEL_ID, EMBEDDINGS_MODEL_ID
24 | 
25 | 
26 | @pytest.mark.usefixtures("client", "setup")
27 | class TestRerank:
28 |     def test_rerank_with_rerank_model(self, client: TestClient, setup):
29 |         """Test the POST /rerank with a rerank model."""
30 |         RERANK_MODEL_ID, _ = setup
31 | 
32 |         params = {"model": RERANK_MODEL_ID, "prompt": "Sort these sentences by relevance.", "input": ["Sentence 1", "Sentence 2", "Sentence 3"]}
33 |         response = client.post_without_permissions(url=f"/v1{ENDPOINT__RERANK}", json=params)
34 |         assert response.status_code == 200, response.text
35 | 
36 |         Reranks(**response.json())  # test output format
37 | 
38 |     def test_rerank_with_wrong_model_type(self, client: TestClient, setup):
39 |         """Test the POST /rerank with a wrong model type."""
40 |         _, EMBEDDINGS_MODEL_ID = setup
41 | 
42 |         params = {"model": EMBEDDINGS_MODEL_ID, "prompt": "Sort these sentences by relevance.", "input": ["Sentence 1", "Sentence 2", "Sentence 3"]}
43 |         response = client.post_without_permissions(url=f"/v1{ENDPOINT__RERANK}", json=params)
44 |         assert response.status_code == 422, response.text
45 | 
46 |     def test_rerank_with_unknown_model(self, client: TestClient, setup):
47 |         """Test the POST /rerank with an unknown model."""
48 |         _, _ = setup
49 | 
50 |         params = {"model": "unknown", "prompt": "Sort these sentences by relevance.", "input": ["Sentence 1", "Sentence 2", "Sentence 3"]}
51 |         response = client.post_without_permissions(url=f"/v1{ENDPOINT__RERANK}", json=params)
52 |         assert response.status_code == 404, response.text
53 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_router.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from app.utils.context import global_context
 4 | 
 5 | 
 6 | @pytest.mark.usefixtures("client")
 7 | class TestModels:
 8 |     def test_get_model_client(self):
 9 |         # Get a language model with more than 1 client
10 |         router = global_context.models(model="albert-small")
11 | 
12 |         # With roundrobin client should be different at each call
13 |         client_1 = router.get_client(endpoint="")
14 |         client_2 = router.get_client(endpoint="")
15 |         client_3 = router.get_client(endpoint="")
16 | 
17 |         assert client_1.timeout != client_2.timeout
18 |         assert client_1.timeout == client_3.timeout
19 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_sql_models.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import pytest
 3 | from typing import Tuple
 4 | from app.sql.models import Usage
 5 | from sqlalchemy import desc
 6 | 
 7 | 
 8 | class TestUsageModel:
 9 |     def test_create_log(self, db_session, users: Tuple[int, int], tokens: Tuple[int, int]):
10 |         """Test creating a log entry"""
11 | 
12 |         user_with_permissions, user_without_permissions = users
13 |         token_with_permissions, token_without_permissions = tokens
14 | 
15 |         log = Usage(
16 |             user_id=user_with_permissions["id"],
17 |             token_id=token_with_permissions["id"],
18 |             endpoint="/test/endpoint",
19 |             model="test_model",
20 |             prompt_tokens=100,
21 |             completion_tokens=0.5,
22 |             total_tokens=50,
23 |         )
24 |         db_session.add(log)
25 |         db_session.commit()
26 | 
27 |         saved_log = db_session.query(Usage).order_by(desc(Usage.id)).first()
28 |         assert saved_log.user_id == user_with_permissions["id"]
29 |         assert saved_log.token_id == token_with_permissions["id"]
30 |         assert saved_log.endpoint == "/test/endpoint"
31 |         assert saved_log.model == "test_model"
32 |         assert saved_log.prompt_tokens == 100
33 |         assert saved_log.completion_tokens == 0.5
34 |         assert saved_log.total_tokens == 50
35 |         assert isinstance(saved_log.datetime, datetime)
36 | 
37 |     def test_log_repr(self, db_session, users: Tuple[int, int], tokens: Tuple[int, int]):
38 |         """Test the string representation of a log entry"""
39 |         user_with_permissions, user_without_permissions = users
40 |         token_with_permissions, token_without_permissions = tokens
41 | 
42 |         log = Usage(user_id=user_with_permissions["id"], token_id=token_with_permissions["id"], endpoint="/test/endpoint", model="test_model")
43 |         db_session.add(log)
44 |         db_session.commit()
45 | 
46 |         assert (
47 |             str(log)
48 |             == f"<Usage (id={log.id}, datetime={log.datetime}, user_id={user_with_permissions["id"]}, token_id={token_with_permissions["id"]}, endpoint=/test/endpoint, duration=None)>"
49 |         )
50 | 
51 |     def test_nullable_fields(self, db_session, users: Tuple[int, int], tokens: Tuple[int, int]):
52 |         """Test that optional fields can be null"""
53 |         user_with_permissions, user_without_permissions = users
54 |         token_with_permissions, token_without_permissions = tokens
55 | 
56 |         log = Usage(user_id=user_with_permissions["id"], token_id=token_with_permissions["id"], endpoint="/test/endpoint", model="test_model")
57 |         db_session.add(log)
58 |         db_session.commit()
59 | 
60 |         saved_log = db_session.query(Usage).order_by(desc(Usage.id)).first()
61 |         assert saved_log.prompt_tokens is None
62 |         assert saved_log.completion_tokens is None
63 |         assert saved_log.total_tokens is None
64 |         assert saved_log.duration is None
65 | 
66 |     def test_non_nullable_fields(self, db_session, users: Tuple[int, int], tokens: Tuple[int, int]):
67 |         """Test that required fields cannot be null"""
68 |         user_with_permissions, user_without_permissions = users
69 |         token_with_permissions, token_without_permissions = tokens
70 | 
71 |         log = Usage(user_id=user_with_permissions["id"], token_id=token_with_permissions["id"], model="test_model")
72 |         with pytest.raises(Exception):
73 |             db_session.add(log)
74 |             db_session.commit()
75 |         db_session.rollback()
76 | 


--------------------------------------------------------------------------------
/app/tests/integ/test_usagesmiddleware.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/tests/integ/test_usagesmiddleware.py


--------------------------------------------------------------------------------
/app/tests/integ/utils/snapshot_assertions.py:
--------------------------------------------------------------------------------
 1 | import difflib
 2 | 
 3 | 
 4 | def assert_snapshot_almost_equal(actual, expected_snapshot, threshold=0.90):
 5 |     """
 6 |     Assert that the actual value is almost equal to the expected snapshot.
 7 | 
 8 |     :param actual: The actual value returned by the OCR.
 9 |     :param expected_snapshot: The expected snapshot value.
10 |     :param threshold: The similarity threshold (default: 0.99).
11 |     :raises AssertionError: If the similarity is below the threshold.
12 |     """
13 |     similarity = difflib.SequenceMatcher(None, actual, expected_snapshot).ratio()
14 |     if similarity < threshold:
15 |         raise AssertionError(
16 |             f"Snapshot mismatch: Similarity {similarity:.2%} is below the threshold of {threshold:.2%}.\n"
17 |             f"Actual: {actual}\n"
18 |             f"Expected: {expected_snapshot}"
19 |         )
20 | 


--------------------------------------------------------------------------------
/app/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/utils/__init__.py


--------------------------------------------------------------------------------
/app/utils/carbon.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | from ecologits.tracers.utils import compute_llm_impacts, electricity_mixes
 5 | 
 6 | from app.schemas.core.usage import CountryCodes
 7 | from app.schemas.usage import CarbonFootprintUsage, CarbonFootprintUsageKgCO2eq, CarbonFootprintUsageKWh
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def get_carbon_footprint(
13 |     active_params: Optional[int],
14 |     total_params: Optional[int],
15 |     model_zone: CountryCodes,
16 |     token_count: int,
17 |     request_latency: float,
18 | ) -> CarbonFootprintUsage:
19 |     """Calculate carbon impact of a model inference using direct parameters.
20 | 
21 |     Args:
22 |         active_params(Optional[int]): Number of active parameters (in millions or billions, must match compute_llm_impacts expectations)
23 |         total_params(Optional[int]): Total number of parameters (in millions or billions, must match compute_llm_impacts expectations)
24 |         model_zone(CountryCodes): Electricity mix zone (Alpha-3 of the country code)
25 |         token_count(int): Number of output tokens
26 |         request_latency(float): Latency of the inference (in seconds)
27 | 
28 |     Returns:
29 |         CarbonFootprintUsage: Computed carbon footprint
30 |     """
31 |     if total_params is None or token_count == 0:
32 |         return CarbonFootprintUsage(
33 |             kWh=CarbonFootprintUsageKWh(min=0, max=0),
34 |             kgCO2eq=CarbonFootprintUsageKgCO2eq(min=0, max=0),
35 |         )
36 | 
37 |     if not isinstance(token_count, (int, float)) or token_count < 0:
38 |         raise ValueError("token_count must be a positive number")
39 |     if not isinstance(request_latency, (int, float)) or request_latency < 0:
40 |         raise ValueError("request_latency must be a positive number")
41 | 
42 |     electricity_mix = electricity_mixes.find_electricity_mix(zone=model_zone.value)
43 |     if not electricity_mix:
44 |         raise ValueError(f"Electricity zone {model_zone.value} not found")
45 | 
46 |     impacts = compute_llm_impacts(
47 |         model_active_parameter_count=active_params,
48 |         model_total_parameter_count=total_params,
49 |         output_token_count=token_count,
50 |         if_electricity_mix_adpe=electricity_mix.adpe,
51 |         if_electricity_mix_pe=electricity_mix.pe,
52 |         if_electricity_mix_gwp=electricity_mix.gwp,
53 |         request_latency=request_latency,
54 |     )
55 |     carbon_footprint = CarbonFootprintUsage(
56 |         kWh=CarbonFootprintUsageKWh(min=impacts.energy.value.min, max=impacts.energy.value.max),
57 |         kgCO2eq=CarbonFootprintUsageKgCO2eq(min=impacts.gwp.value.min, max=impacts.gwp.value.max),
58 |     )
59 | 
60 |     return carbon_footprint
61 | 


--------------------------------------------------------------------------------
/app/utils/context.py:
--------------------------------------------------------------------------------
 1 | from contextvars import ContextVar
 2 | from uuid import uuid4
 3 | 
 4 | from app.schemas.core.context import GlobalContext, RequestContext
 5 | 
 6 | global_context: GlobalContext = GlobalContext()
 7 | request_context: ContextVar[RequestContext] = ContextVar("request_context", default=RequestContext())
 8 | 
 9 | 
10 | def generate_request_id() -> str:
11 |     """
12 |     Get the ID of the request.
13 |     """
14 |     return f"request-{str(uuid4()).replace("-", "")}"
15 | 


--------------------------------------------------------------------------------
/app/utils/depends.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/app/utils/depends.py


--------------------------------------------------------------------------------
/app/utils/logging.py:
--------------------------------------------------------------------------------
 1 | from contextvars import ContextVar
 2 | from logging import Filter, Formatter, Logger, StreamHandler, getLogger
 3 | import sys
 4 | from typing import Optional
 5 | 
 6 | from app.utils.settings import settings
 7 | 
 8 | client_ip: ContextVar[Optional[str]] = ContextVar("client_ip", default=None)
 9 | 
10 | 
11 | class ClientIPFilter(Filter):
12 |     def filter(self, record):
13 |         client_addr = client_ip.get()
14 |         record.client_ip = client_addr if client_addr else "."
15 |         return True
16 | 
17 | 
18 | def init_logger(name) -> Logger:
19 |     logger = getLogger(name=name)
20 |     logger.setLevel(level=settings.general.log_level)
21 |     handler = StreamHandler(stream=sys.stdout)
22 |     formatter = Formatter("[%(asctime)s][%(process)d:%(name)s][%(levelname)s] %(client_ip)s - %(message)s")
23 |     handler.setFormatter(formatter)
24 |     handler.addFilter(ClientIPFilter())
25 | 
26 |     logger.addHandler(handler)
27 |     logger.propagate = False  # Prevent propagation to root logger
28 | 
29 |     return logger
30 | 


--------------------------------------------------------------------------------
/app/utils/settings.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | from app.schemas.core.settings import Settings
 3 | 
 4 | 
 5 | @lru_cache
 6 | def get_settings() -> Settings:
 7 |     return Settings()
 8 | 
 9 | 
10 | settings = get_settings()
11 | 


--------------------------------------------------------------------------------
/compose.dev.yml:
--------------------------------------------------------------------------------
 1 | name: albert-api
 2 | 
 3 | services:
 4 |   api:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: app/Dockerfile
 8 |     platform: linux/amd64
 9 |     restart: always
10 |     environment:
11 |       - COVERAGE_RCFILE=./app/.coveragerc
12 |       - POSTGRES_HOST=postgres
13 |       - REDIS_HOST=redis
14 |       - QDRANT_HOST=qdrant
15 |       - OPENAI_API_KEY=${OPENAI_API_KEY} # replace with your own key
16 |     ports:
17 |       - 8000:8000
18 |     volumes:
19 |       - ./config.yml:/config.yml:ro
20 |     develop:
21 |       watch:
22 |         - action: sync+restart
23 |           path: ./app
24 |           target: /app
25 |         - action: build+restart
26 |           path: .
27 |           target: /app
28 |     depends_on:
29 |       redis:
30 |         condition: service_healthy
31 |       postgres:
32 |         condition: service_healthy
33 |       qdrant:
34 |         condition: service_healthy
35 | 
36 |   playground:
37 |     build:
38 |       context: .
39 |       dockerfile: ui/Dockerfile
40 |     platform: linux/amd64
41 |     restart: always
42 |     ports:
43 |       - 8501:8501
44 |     environment:
45 |       - API_HOST=api
46 |       - POSTGRES_HOST=postgres
47 |     volumes:
48 |       - ./config.yml:/config.yml:ro
49 |     develop:
50 |       watch:
51 |         - action: sync+restart
52 |           path: ./ui
53 |           target: /app
54 |     depends_on:
55 |       postgres:
56 |         condition: service_healthy
57 | 
58 |   postgres:
59 |     extends:
60 |       file: compose.yml
61 |       service: postgres
62 | 
63 |   redis:
64 |     extends:
65 |       file: compose.yml
66 |       service: redis
67 | 
68 |   qdrant:
69 |     extends:
70 |       file: compose.yml
71 |       service: qdrant
72 | 
73 |   mcp-bridge:
74 |     extends:
75 |       file: compose.yml
76 |       service: mcp-bridge
77 | 
78 | volumes:
79 |   postgres:
80 |   redis:
81 |   qdrant:
82 | 


--------------------------------------------------------------------------------
/compose.prod.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   api:
 3 |     image: ghcr.io/etalab-ia/albert-api/app:latest
 4 |     platform: linux/amd64
 5 |     restart: always
 6 |     environment:
 7 |       - COVERAGE_RCFILE=./app/.coveragerc
 8 |       - POSTGRES_HOST=postgres
 9 |       - REDIS_HOST=redis
10 |       - QDRANT_HOST=qdrant
11 |       - OPENAI_API_KEY=${OPENAI_API_KEY} # replace with your own key
12 |     ports:
13 |       - 8000:8000
14 |     volumes:
15 |       - ./config.yml:/config.yml:ro
16 |     depends_on:
17 |       redis:
18 |         condition: service_healthy
19 |       postgres:
20 |         condition: service_healthy
21 |       qdrant:
22 |         condition: service_healthy
23 | 
24 |   playground:
25 |     image: ghcr.io/etalab-ia/albert-api/ui:latest
26 |     platform: linux/amd64
27 |     restart: always
28 |     environment:
29 |       - API_HOST=api
30 |       - POSTGRES_HOST=postgres
31 |     ports:
32 |       - 8501:8501
33 |     volumes:
34 |       - ./config.yml:/config.yml:ro
35 |     depends_on:
36 |       postgres:
37 |         condition: service_healthy
38 | 
39 |   postgres:
40 |     extends:
41 |       file: compose.yml
42 |       service: postgres
43 | 
44 |   redis:
45 |     extends:
46 |       file: compose.yml
47 |       service: redis
48 | 
49 |   qdrant:
50 |     extends:
51 |       file: compose.yml
52 |       service: qdrant
53 | 
54 |   mcp-bridge:
55 |     extends:
56 |       file: compose.yml
57 |       service: mcp-bridge
58 | 
59 | volumes:
60 |   postgres:
61 |   redis:
62 |   qdrant:
63 | 


--------------------------------------------------------------------------------
/compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   redis:
 3 |     image: redis/redis-stack-server:7.2.0-v11
 4 |     restart: always
 5 |     environment:
 6 |       REDIS_ARGS: --dir /data --requirepass changeme --user username on >password ~* allcommands --save 60 1 --appendonly yes
 7 |     ports:
 8 |       - 6379:6379
 9 |     volumes:
10 |       - redis:/data
11 |     healthcheck:
12 |       test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ]
13 |       interval: 4s
14 |       timeout: 10s
15 |       retries: 5
16 | 
17 |   qdrant:
18 |     image: qdrant/qdrant:v1.11.5-unprivileged
19 |     restart: always
20 |     environment:
21 |       - QDRANT__SERVICE__API_KEY=changeme
22 |     volumes:
23 |       - qdrant:/qdrant/storage
24 |     ports:
25 |       - 6333:6333
26 |       - 6334:6334
27 |     healthcheck:
28 |       test: [ "CMD-SHELL", "bash", "-c", ":> /dev/tcp/127.0.0.1/6333" ]
29 |       interval: 4s
30 |       timeout: 10s
31 |       retries: 5
32 | 
33 |   postgres:
34 |     image: postgres:16.5
35 |     restart: always
36 |     user: postgres
37 |     environment:
38 |       - CREATE_DB=api,playground
39 |       - POSTGRES_USER=postgres
40 |       - POSTGRES_PASSWORD=changeme
41 |       - POSTGRES_DB=postgres
42 |     ports:
43 |       - 5432:5432
44 |     volumes:
45 |       - postgres:/var/lib/postgresql/data
46 |       - ./scripts/postgres_entrypoint.sh:/docker-entrypoint-initdb.d/postgres_entrypoint.sh
47 |     healthcheck:
48 |       test: [ "CMD-SHELL", "pg_isready", "-U", "postgres" ]
49 |       interval: 4s
50 |       timeout: 10s
51 |       retries: 5
52 |       start_period: 60s
53 | 
54 |   mcp-bridge:
55 |     image: ghcr.io/etalab-ia/albert-api-mcp-bridge/albert-api-mcp-bridge:latest
56 |     develop:
57 |       watch:
58 |         - path: mcp_bridge
59 |           action: rebuild
60 |     extra_hosts:
61 |       - "host.docker.internal:host-gateway"
62 |     ports:
63 |       - "9876:8000"
64 |     environment:
65 |       - MCP_BRIDGE__CONFIG__FILE=config.json
66 |     volumes:
67 |       - ./mcp/config.json:/mcp_bridge/config.json
68 |       - ./mcp/data_gouv_fr_mcp_server:/mcp_bridge/data-gouv-fr-mcp-server
69 |     restart: unless-stopped
70 | 
71 | volumes:
72 |   qdrant:
73 |   redis:
74 |   postgres:
75 | 


--------------------------------------------------------------------------------
/config.example.yml:
--------------------------------------------------------------------------------
 1 | general:
 2 |   log_level: DEBUG
 3 |   # disabled_routers:
 4 |   #   - ocr
 5 | 
 6 | 
 7 | playground:
 8 |   api_url: http://${API_HOST}:8000
 9 | 
10 | auth:
11 |   master_username: master
12 |   master_key: changeme
13 | 
14 | monitoring:
15 |   postgres:
16 |     enabled: true
17 | 
18 | mcp:
19 |   mcp_bridge_url: http://localhost:9000/
20 | 
21 | databases:
22 |   - type: qdrant
23 |     context: api
24 |     model: my-embeddings-model
25 |     args:
26 |       url: http://${QDRANT_HOST}
27 |       api_key: changeme
28 |       timeout: 10
29 | 
30 |   - type: redis
31 |     context: api
32 |     args:
33 |       host: ${REDIS_HOST}
34 |       password: changeme
35 | 
36 |   - type: sql # API database (async)
37 |     context: api
38 |     args:
39 |       url: postgresql+asyncpg://postgres:changeme@${POSTGRES_HOST}:5432/api
40 |       echo: False
41 |       pool_size: 5
42 |       max_overflow: 10
43 |       pool_pre_ping: True
44 |       connect_args:
45 |         server_settings:
46 |           statement_timeout: "120s"
47 | 
48 |   - type: sql # Playground database (sync)
49 |     context: playground
50 |     args:
51 |       url: postgresql://postgres:changeme@${POSTGRES_HOST}:5432/playground
52 |       echo: False
53 |       pool_size: 5
54 |       max_overflow: 10
55 |       pool_pre_ping: True
56 | 
57 | models:
58 |   - id: my-language-model
59 |     type: text-generation
60 |     clients:
61 |       - model: gpt-3.5-turbo
62 |         type: openai
63 |         args:
64 |           api_url: https://api.openai.com
65 |           api_key: ${OPENAI_API_KEY}
66 |           timeout: 60
67 | 
68 |   - id: my-embeddings-model
69 |     type: text-embeddings-inference
70 |     clients:
71 |       - model: text-embedding-3-small
72 |         type: openai
73 |         args:
74 |           api_url: https://api.openai.com
75 |           api_key: ${OPENAI_API_KEY}
76 |           timeout: 60
77 | 


--------------------------------------------------------------------------------
/docs/architecture.md:
--------------------------------------------------------------------------------
  1 | # Code architecture
  2 | 
  3 | ```mermaid  
  4 | ---
  5 | config:
  6 |   layout: elk
  7 |   elk:
  8 |     mergeEdges: true
  9 | 
 10 | ---
 11 | flowchart LR
 12 |     config@{ shape: tag-doc, label: "config.yml" }
 13 |     
 14 |     subgraph **main.py**
 15 |     settings[utils/settings.py]
 16 |     lifespan[utils/lifespan.py]
 17 | 
 18 |     config --> settings
 19 |     settings --> lifespan
 20 |     end
 21 | 
 22 |     subgraph **app/clients**
 23 |     redisclient[Redis - ConnectionPool]
 24 |     sqlclient[SQLAlchemy - AsyncSession]
 25 |     qdrantclient[Qrant - AsyncQdrantClient]
 26 |     internetclient[BraveInternetClient<br>DuckduckgoInternetClient]
 27 |     modelclient@{ shape: processes, label: "VllmModelClient<br>TeiModelClient<br>AlbertModelClient<br>OpenaiModelClient" }
 28 |     
 29 |     lifespan --> redisclient
 30 |     lifespan --> sqlclient
 31 |     lifespan --> qdrantclient
 32 |     lifespan -- one of two--> internetclient
 33 |     lifespan -- for each model --> modelclient
 34 | 
 35 |     style redisclient stroke-dasharray: 5 5
 36 |     style qdrantclient stroke-dasharray: 5 5
 37 |     end
 38 |     
 39 |     subgraph **app/helpers**
 40 |     modelrouter@{ shape: processes, label: "ModelRouter" }
 41 |     modelregistry[ModelRegistry]
 42 |     identityaccessmanager[IdentityAccessManager]
 43 |     documentmanager[DocumentManager]
 44 |     limiter[Limiter]
 45 | 
 46 |     modelclient --> modelrouter
 47 |     modelrouter --> modelregistry
 48 |     sqlclient --> identityaccessmanager
 49 |  
 50 |     internetclient --> websearchmanager
 51 |     websearchmanager --> documentmanager
 52 |     sqlclient --> documentmanager
 53 |     qdrantclient --> documentmanager
 54 |     redisclient --> limiter
 55 | 
 56 |     style documentmanager fill:blue,stroke:#000,stroke-width:1px,color:#fff
 57 |     style modelregistry fill:orange,stroke:#000,stroke-width:1px,color:#fff
 58 |     style identityaccessmanager fill:purple,stroke:#000,stroke-width:1px,color:#fff
 59 |     style limiter fill:black,stroke:#000,stroke-width:1px,color:#fff
 60 |     end
 61 | 
 62 |     subgraph **app/endpoints**
 63 |     documentsendpoints[documents]
 64 |     searchendpoints[search]
 65 |     modelsendpoints[models]
 66 |     chatendpoints[chat]
 67 |     audioendpoints[audio]
 68 |     completionsendpoint[completions]
 69 |     embeddingsendpoints[embeddings]
 70 |     rerankendpoints[rerank]
 71 |     collectionsendpoints[collections]
 72 |     authendpoints[auth]
 73 | 
 74 |     modelregistry ==> modelsendpoints
 75 |     modelregistry ==> chatendpoints
 76 |     modelregistry ==> completionsendpoint
 77 |     modelregistry ==> embeddingsendpoints
 78 |     modelregistry ==> rerankendpoints
 79 |     modelregistry ==> searchendpoints
 80 |     modelregistry ==> documentsendpoints
 81 |  
 82 |     identityaccessmanager ==> authendpoints
 83 | 
 84 |     documentmanager ==> documentsendpoints
 85 |     documentmanager ==> searchendpoints
 86 |     documentmanager ==> collectionsendpoints
 87 | 
 88 | 
 89 |     end
 90 |     
 91 |     subgraph **depends.py**
 92 |     authorization[Authorization]
 93 | 
 94 |     limiter ====> authorization
 95 |     identityaccessmanager ===> authorization
 96 |     modelregistry ===> authorization
 97 | 
 98 |     style authorization fill:red,stroke:#000,stroke-width:1px,color:#fff
 99 |     end
100 | 
101 |     authorization ==all endpoints==> **app/endpoints**
102 | ```


--------------------------------------------------------------------------------
/docs/assets/collections_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/collections_001.png


--------------------------------------------------------------------------------
/docs/assets/collections_002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/collections_002.png


--------------------------------------------------------------------------------
/docs/assets/collections_003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/collections_003.png


--------------------------------------------------------------------------------
/docs/assets/collections_004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/collections_004.png


--------------------------------------------------------------------------------
/docs/assets/collections_005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/collections_005.png


--------------------------------------------------------------------------------
/docs/assets/deployment_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/deployment_001.png


--------------------------------------------------------------------------------
/docs/assets/iam_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/iam_001.png


--------------------------------------------------------------------------------
/docs/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/docs/assets/logo.png


--------------------------------------------------------------------------------
/docs/budget.md:
--------------------------------------------------------------------------------
 1 | # Budget
 2 | 
 3 | ## How it works
 4 | 
 5 | ### User budget
 6 | 
 7 | Each user has a budget defined by POST `/v1/users` endpoint. The budget is defined in the `budget` field.
 8 | 
 9 | ```bash
10 | curl -X POST http://localhost:8000/v1/users \
11 | -H "Authorization: Bearer <token>" \
12 | -H "Content-Type: application/json" \
13 | -d '{
14 |     "name": "John Doe",
15 |     "role": 1,
16 |     "expires_at": "2025-01-01",
17 |     "budget": 100
18 | }'
19 | ```
20 | 
21 | Or by PATCH `/v1/users/{user_id}` endpoint to update the budget of an existing user.
22 | 
23 | ```bash
24 | curl -X PATCH http://localhost:8000/v1/users/1 \
25 | -H "Authorization: Bearer <token>" \
26 | -H "Content-Type: application/json" \
27 | -d '{
28 |     "budget": 100
29 | }'
30 | ```
31 | 
32 | > **❗️Note**<br>
33 | > If budget is not defined, the user has no limit on the number of requests.
34 | 
35 | ### Model costs
36 | 
37 | For each client model, is defined a costs in the `config.yml` file for the prompt and completion tokens (per million tokens). Example:
38 | 
39 | ```yaml
40 | models:
41 |   - id: language-model
42 |     type: text-generation
43 |     clients:
44 |       - model: openai/gpt-4o-mini
45 |         type: openai
46 |         costs:
47 |             prompt_tokens: 0.1
48 |             completion_tokens: 0.3
49 | ```
50 | 
51 | The compute cost is calculated based on the number of tokens used and the budget defined for the model based on the following formula:
52 | 
53 | ```python
54 | cost = round((prompt_tokens / 1000000 * client.costs.prompt_tokens) + (completion_tokens / 1000000 * client.costs.completion_tokens), ndigits=6)
55 | ```
56 | 
57 | The compute cost returned in the response, in the `usage.cost` field. After the request is processed, the budget amount of the user is updated by the `update_budget` function in the `hooks_decorator.py` file.


--------------------------------------------------------------------------------
/docs/models.md:
--------------------------------------------------------------------------------
 1 | # Models
 2 | 
 3 | L'API Albert permet de configurer 4 types de modèles :
 4 | - text-generation : modèle de language
 5 | - text-embeddings-inference : modèle d'embeddings
 6 | - automatic-speech-recognition : modèle de transcription audio.
 7 | - text-classification : modèle de reranking
 8 | 
 9 | Pour configurer la connexion à ces modèles, voir la documentation [deployment](./deployment.md).
10 | 
11 | ## text-generation
12 | 
13 | Pour les modèles de language, vous pouvez utiliser n'importe quel API compatible avec le format [OpenAI](https://platform.openai.com/docs/api-reference/chat/create), c'est-à-dire disposant d'un endpoint `/v1/chat/completions`.
14 | 
15 | Si vous souhaitez déployer un modèle de language, vous recommandons d'utiliser [vLLM](https://github.com/vllm-project/vllm). Exemple de modèle de language : [guillaumetell-7b](https://huggingface.co/AgentPublic/guillaumetell-7b).
16 | 
17 | **⚠️ Le déploiement de l'API est pas conditionné à la fourniture d'un modèle de language.**
18 | 
19 | ## text-embeddings-inference
20 | 
21 | Pour les modèles d'embeddings, vous pouvez utiliser n'importe quel API compatible avec le format [OpenAI](https://platform.openai.com/docs/api-reference/embeddings), c'est-à-dire disposant d'un endpoint `/v1/embeddings`.
22 | 
23 | Si vous souhaitez déployer un modèle d'embeddings, vous recommandons d'utiliser [HuggingFace Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference). Exemple de modèle d'embeddings : [multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large).
24 | 
25 | **⚠️ Le déploiement de l'API est conditionné à la fourniture d'un modèle d'embeddings.**
26 | 
27 | ## automatic-speech-recognition
28 | 
29 | Pour les modèles de transcription audio, vous pouvez utiliser n'importe quel API compatible avec le format [OpenAI](https://platform.openai.com/docs/api-reference/audio/create-transcription), c'est-à-dire disposant d'un endpoint `/v1/audio/transcriptions`.
30 | 
31 | Si vous souhaitez déployer un modèle de transcription audio, vous recommandons d'utiliser [Whisper OpenAI API](https://github.com/etalab-ia/whisper-openai-api). Exemple de modèle de transcription audio : [whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo).
32 | 
33 | Le déploiement de l'API n'est pas conditionné à la fourniture d'un modèle de transcription audio.
34 | 
35 | ## text-classification
36 | 
37 | Pour les modèles de reranking, vous devez une API compatible avec le format proposé par l'API [HuggingFace Text Embeddings Inference](https://huggingface.github.io/text-embeddings-inference/), c'est-à-dire disposant d'un endpoint `/rerank`.
38 | 
39 | Si vous souhaitez déployer un modèle de reranking, vous recommandons d'utiliser [HuggingFace Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference). Exemple de modèle de reranking : [bge-reranker-v2-m3](https://huggingface.co/BAAI/bge-reranker-v2-m3).
40 | 
41 | Le déploiement de l'API n'est pas conditionné à la fourniture d'un modèle de reranking.
42 | 


--------------------------------------------------------------------------------
/docs/tutorials/audio_transcriptions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "81d87e1a-014f-43a2-a0a5-703bd158f0f9",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Chat completions"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "0df030ba",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%pip install -qU openai"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 14,
 24 |    "id": "87185611-802d-4127-9ba0-3cc5dacb2351",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import os\n",
 29 |     "import requests\n",
 30 |     "from openai import OpenAI"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "id": "08a18feb-e58b-4fb3-809e-045a81bec9dd",
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "# OpenAI client configuration\n",
 41 |     "\n",
 42 |     "base_url = \"https://albert.api.etalab.gouv.fr/v1\"\n",
 43 |     "api_key = os.getenv(\"API_KEY\")\n",
 44 |     "client = OpenAI(base_url=base_url, api_key=api_key)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 12,
 50 |    "id": "3032b836",
 51 |    "metadata": {},
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "openai/whisper-large-v3\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "# get the list of available models\n",
 63 |     "response = client.models.list()\n",
 64 |     "\n",
 65 |     "# get an audio model (type: automatic-speech-recognition)\n",
 66 |     "model = [model for model in response.data if model.type == \"automatic-speech-recognition\"][0].id\n",
 67 |     "print(model)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 26,
 73 |    "id": "aa88806c-094e-43a0-8a4f-02aefbcf4a09",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# download an audio file\n",
 78 |     "url = \"https://www.lightbulblanguages.co.uk/resources/audio/animal%20qu.mp3\"\n",
 79 |     "response = requests.request(\"GET\", url)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 31,
 85 |    "id": "3f700cab-e53f-4a4c-8cbc-be9bdf96a7d7",
 86 |    "metadata": {},
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       " Est-ce que tu as un animal ? As-tu un animal ? Tu as un animal ?\n"
 93 |      ]
 94 |     }
 95 |    ],
 96 |    "source": [
 97 |     "# transcript the audio file\n",
 98 |     "data = {\n",
 99 |     "    \"model\": model,\n",
100 |     "    \"file\": (\"./audio.mp3\", response.content, \"audio/mp3\"),\n",
101 |     "    \"language\": \"fr\",\n",
102 |     "    \"response_format\": \"json\",\n",
103 |     "    \"temperature\": 0.2,\n",
104 |     "}\n",
105 |     "\n",
106 |     "response = client.audio.transcriptions.create(**data)\n",
107 |     "print(response.text)"
108 |    ]
109 |   }
110 |  ],
111 |  "metadata": {
112 |   "kernelspec": {
113 |    "display_name": "Python 3 (ipykernel)",
114 |    "language": "python",
115 |    "name": "python3"
116 |   },
117 |   "language_info": {
118 |    "codemirror_mode": {
119 |     "name": "ipython",
120 |     "version": 3
121 |    },
122 |    "file_extension": ".py",
123 |    "mimetype": "text/x-python",
124 |    "name": "python",
125 |    "nbconvert_exporter": "python",
126 |    "pygments_lexer": "ipython3",
127 |    "version": "3.12.3"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 5
132 | }
133 | 


--------------------------------------------------------------------------------
/docs/tutorials/chat_completions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "81d87e1a-014f-43a2-a0a5-703bd158f0f9",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Chat completions"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "0df030ba",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%pip install -qU openai"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "id": "08a18feb-e58b-4fb3-809e-045a81bec9dd",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# OpenAI client configuration\n",
 29 |     "import os\n",
 30 |     "from openai import OpenAI\n",
 31 |     "\n",
 32 |     "base_url = \"https://albert.api.etalab.gouv.fr/v1\"\n",
 33 |     "api_key = os.getenv(\"API_KEY\")\n",
 34 |     "\n",
 35 |     "client = OpenAI(base_url=base_url, api_key=api_key)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "id": "3f700cab-e53f-4a4c-8cbc-be9bdf96a7d7",
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "Bonjour ! Je m'appelle Albert, mais je suis un modèle de langage artificiel, donc je ne suis pas vraiment Albert, mais je suis là pour discuter avec toi ! Comment ça va ?\n"
 49 |      ]
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "# unstreamed chat\n",
 54 |     "data = {\n",
 55 |     "    \"model\": \"AgentPublic/llama3-instruct-8b\",\n",
 56 |     "    \"messages\": [{\"role\": \"user\", \"content\": \"Salut Albert !\"}],\n",
 57 |     "    \"stream\": False,\n",
 58 |     "    \"n\": 1,\n",
 59 |     "}\n",
 60 |     "\n",
 61 |     "response = client.chat.completions.create(**data)\n",
 62 |     "print(response.choices[0].message.content)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "id": "1f7753b8",
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "NoneSalut ! Je suis Albert, un modèle de langage artificiel. Je suis ravi de vous voir ! Qu'est-ce que vous voulez discuter ?"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "# streamed chat\n",
 81 |     "data = {\n",
 82 |     "    \"model\": \"AgentPublic/llama3-instruct-8b\",\n",
 83 |     "    \"messages\": [{\"role\": \"user\", \"content\": \"Salut Albert !\"}],\n",
 84 |     "    \"stream\": True,\n",
 85 |     "    \"n\": 1,\n",
 86 |     "}\n",
 87 |     "\n",
 88 |     "response = client.chat.completions.create(**data)\n",
 89 |     "for chunk in response:\n",
 90 |     "    if chunk.choices[0].finish_reason is not None:\n",
 91 |     "        break\n",
 92 |     "    print(chunk.choices[0].delta.content, end=\"\\n\", flush=True)"
 93 |    ]
 94 |   }
 95 |  ],
 96 |  "metadata": {
 97 |   "kernelspec": {
 98 |    "display_name": "Python 3 (ipykernel)",
 99 |    "language": "python",
100 |    "name": "python3"
101 |   },
102 |   "language_info": {
103 |    "codemirror_mode": {
104 |     "name": "ipython",
105 |     "version": 3
106 |    },
107 |    "file_extension": ".py",
108 |    "mimetype": "text/x-python",
109 |    "name": "python",
110 |    "nbconvert_exporter": "python",
111 |    "pygments_lexer": "ipython3",
112 |    "version": "3.12.1"
113 |   }
114 |  },
115 |  "nbformat": 4,
116 |  "nbformat_minor": 5
117 | }
118 | 


--------------------------------------------------------------------------------
/docs/tutorials/models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "57fc5450-a36f-48c0-a53c-9e3698c3267a",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Models"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "id": "afa7fd12",
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%pip install -qU openai"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 1,
 24 |    "id": "8e5bf034-950e-4a6b-a5bc-4abca3b54959",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# OpenAI client configuration\n",
 29 |     "import os\n",
 30 |     "from openai import OpenAI\n",
 31 |     "\n",
 32 |     "base_url = \"https://albert.api.etalab.gouv.fr/v1\"\n",
 33 |     "api_key = os.getenv(\"API_KEY\")\n",
 34 |     "\n",
 35 |     "client = OpenAI(base_url=base_url, api_key=api_key)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "id": "1aa563f6-e734-4452-a023-7a423612e7eb",
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "meta-llama/Meta-Llama-3.1-8B-Instruct\n",
 49 |       "mistralai/Mixtral-8x7B-Instruct-v0.1\n",
 50 |       "AgentPublic/llama3-instruct-8b\n",
 51 |       "BAAI/bge-m3\n",
 52 |       "AgentPublic/llama3-instruct-guillaumetell\n",
 53 |       "intfloat/multilingual-e5-large\n",
 54 |       "google/gemma-2-9b-it\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "# Get all models information\n",
 60 |     "models = client.models.list()\n",
 61 |     "\n",
 62 |     "for model in models.data:\n",
 63 |     "    print(model.id)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 7,
 69 |    "id": "a5339237-dbf6-488d-ae26-98e52eccbd64",
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "AgentPublic/llama3-instruct-8b\n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "# Get only one model information\n",
 82 |     "model = \"AgentPublic/llama3-instruct-8b\"\n",
 83 |     "model = client.models.retrieve(model=model)\n",
 84 |     "\n",
 85 |     "print(model.id)"
 86 |    ]
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python 3 (ipykernel)",
 92 |    "language": "python",
 93 |    "name": "python3"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.12.3"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 5
110 | }
111 | 


--------------------------------------------------------------------------------
/mcp/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mcp_servers": {
 3 |         "data-gouv": {
 4 |             "command": "uv",
 5 |             "args": [
 6 |                 "--directory",
 7 |                 "/mcp_bridge/data-gouv-fr-mcp-server",
 8 |                 "run",
 9 |                 "main.py"
10 |             ]
11 |         }
12 | }}


--------------------------------------------------------------------------------
/mcp/data_gouv_fr_mcp_server/README.md:
--------------------------------------------------------------------------------
 1 | # data.gouv.fr MCP Server
 2 | 
 3 | Basic MCP server which returns datasets for a given query
 4 | 
 5 | ## installation
 6 | 
 7 | 1. Enable direnv:
 8 | 
 9 | ```commandline
10 | direnv allow .
11 | ```
12 | 
13 | 2. Install dependencies:
14 | ```commandline
15 | pip install .
16 | ```
17 | 


--------------------------------------------------------------------------------
/mcp/data_gouv_fr_mcp_server/infra/clients/http_client.py:
--------------------------------------------------------------------------------
 1 | import urllib
 2 | 
 3 | import httpx
 4 | 
 5 | 
 6 | class HttpClient:
 7 |     def __init__(self, url):
 8 |         self.url = url
 9 | 
10 |     async def get_datasets(self, query, limit):
11 |         api_url_with_query = self._format_api_url_with_query(query, limit)
12 |         try:
13 |             async with httpx.AsyncClient(timeout=30.0) as client:
14 |                 response = await client.get(api_url_with_query)
15 | 
16 |             if response.status_code != 200:
17 |                 return {
18 |                     "error": f"Erreur de l'API data.gouv.fr (HTTP {response.status_code})",
19 |                     "details": response.text[:200] if response.text else "Pas de détails",
20 |                 }
21 | 
22 |             try:
23 |                 api_data = response.json()
24 |             except Exception as e:
25 |                 return {"error": f"Réponse JSON invalide: {str(e)}"}
26 | 
27 |             return api_data
28 |         except httpx.TimeoutException:
29 |             return {"error": "Timeout lors de la requête vers data.gouv.fr"}
30 |         except httpx.RequestError as e:
31 |             return {"error": f"Erreur de requête: {str(e)}"}
32 |         except Exception as e:
33 |             return {"error": f"Erreur inattendue: {str(e)}"}
34 | 
35 |     def _format_api_url_with_query(self, query, limit):
36 |         query_clean = query.strip().strip("?").strip()
37 |         query_encoded = urllib.parse.quote(query_clean)
38 |         api_url = f"{self.url}/datasets/?q={query_encoded}&page_size={limit}"
39 |         return api_url
40 | 


--------------------------------------------------------------------------------
/mcp/data_gouv_fr_mcp_server/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Serveur MCP pour rechercher des jeux de données sur data.gouv.fr
 4 | """
 5 | 
 6 | from typing import Dict, Any
 7 | 
 8 | from mcp.server.fastmcp import FastMCP
 9 | 
10 | from infra.clients.http_client import HttpClient
11 | 
12 | 
13 | class McpDataGouv:
14 |     def __init__(self, http_client, name: str = "DataGouvFr"):
15 |         self.mcp = FastMCP(name)
16 |         self.http_client = http_client
17 |         self._register_tools()
18 | 
19 |     def _register_tools(self):
20 |         """Enregistre les outils MCP"""
21 | 
22 |         @self.mcp.tool()
23 |         async def search_datasets(query: str, limit: int = 10) -> Dict[str, Any]:
24 |             """
25 |             Recherche des jeux de données sur data.gouv.fr
26 | 
27 |             Args:
28 |                 query: La requête de recherche (mots-clés)
29 |                 limit: Nombre maximum de résultats à retourner (défaut: 10)
30 | 
31 |             Returns:
32 |                 Dict contenant les résultats de la recherche
33 |             """
34 |             return await self._search_datasets_impl(query, limit)
35 | 
36 |     async def _search_datasets_impl(self, query: str, limit: int = 10) -> Dict[str, Any]:
37 |         if not query or not query.strip():
38 |             return {"error": "La requête de recherche ne peut pas être vide"}
39 | 
40 |         api_data = await self.http_client.get_datasets(query, limit)
41 | 
42 |         datasets = api_data.get("data", [])
43 | 
44 |         resultats = self._to_mcp_tool_response(datasets)
45 | 
46 |         return {"query": query, "nombre_resultats": len(resultats), "total_disponible": api_data.get("total", 0), "resultats": resultats}
47 | 
48 |     def _to_mcp_tool_response(self, datasets):
49 |         resultats = []
50 |         for ds in datasets:
51 |             dataset_id = ds.get("id", "")
52 |             titre = ds.get("title", "Sans titre")
53 |             description = ds.get("description", "")
54 |             url = ds.get("page", "")  # 'page' est l'URL de la page du dataset
55 | 
56 |             organization = ds.get("organization", {})
57 |             org_name = organization.get("name", "") if organization else ""
58 | 
59 |             tags = [tag for tag in ds.get("tags", [])]
60 |             created_at = ds.get("created_at", "")
61 |             last_modified = ds.get("last_modified", "")
62 | 
63 |             resources = ds.get("resources", [])
64 |             formats = list(set([res.get("format", "").upper() for res in resources if res.get("format")]))
65 | 
66 |             dataset_info = {
67 |                 "id": dataset_id,
68 |                 "titre": titre,
69 |                 "description": description[:500] + ("..." if len(description) > 500 else ""),  # Limiter la description
70 |                 "url": url,
71 |                 "organisation": org_name,
72 |                 "tags": tags[:5],
73 |                 "formats_disponibles": formats,
74 |                 "date_creation": created_at.split("T")[0] if created_at else "",
75 |                 "derniere_modification": last_modified.split("T")[0] if last_modified else "",
76 |                 "nombre_ressources": len(resources),
77 |             }
78 | 
79 |             resultats.append(dataset_info)
80 |         return resultats
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     http_client = HttpClient("https://www.data.gouv.fr/api/1")
85 |     mcp_data_gouv = McpDataGouv(http_client, "DataGouvFr")
86 | 
87 |     mcp_data_gouv.mcp.run()
88 | 


--------------------------------------------------------------------------------
/mcp/data_gouv_fr_mcp_server/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "data-gouv-fr-mcp-server"
 3 | version = "0.1.0"
 4 | description = "A simple MCP data gouv fr server"
 5 | readme = "README.md"
 6 | requires-python = ">=3.10"
 7 | dependencies = [
 8 |     "httpx>=0.28.1",
 9 |     "mcp[cli]>=1.2.0",
10 |     "pytest-asyncio==1.0.0"
11 | ]
12 | 
13 | [project.scripts]
14 | weather = "main:main"
15 | 
16 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "albert-api"
 3 | version = "0.1.6"
 4 | description = "Albert API projects"
 5 | requires-python = ">=3.12"
 6 | license = { text = "MIT" }
 7 | dependencies = [
 8 |     "openai==1.63.0",
 9 |     "requests==2.32.3",
10 |     "pydantic==2.10.6",
11 |     "pydantic-settings==2.7.1",
12 |     "alembic==1.15.1",
13 |     "psycopg2==2.9.10",
14 |     "asyncpg==0.30.0",
15 |     "sqlalchemy[asyncio]==2.0.38",
16 |     "sqlalchemy-utils==0.41.2",
17 |     "sentry-sdk[fastapi]>=2.28.0",
18 | ]
19 | 
20 | [project.optional-dependencies]
21 | ui = [
22 |     "streamlit==1.45.1",
23 |     "streamlit-extras==0.5.0", 
24 |     "bcrypt==4.3.0",
25 | ]
26 | app = [
27 |     # auth
28 |     "python-jose==3.4.0",
29 |     "limits==5.1.0",
30 |     "coredis==4.20.0",
31 |     "tiktoken==0.9.0",
32 |     "ecologits==0.6.2",
33 |     "pycountry==24.6.1",
34 | 
35 |     # data
36 |     "elasticsearch==8.17.1",
37 |     "langchain-text-splitters==0.3.8",
38 |     "qdrant-client==1.10.1",
39 |     "redis==5.2.1",
40 |     "beautifulsoup4==4.13.3",
41 |     "PyMuPDF==1.26.0",
42 | 
43 |     # app
44 |     "gunicorn==23.0.0",
45 |     "fastapi==0.115.8",
46 |     "prometheus-fastapi-instrumentator==7.0.2",
47 |     "pyyaml==6.0.2",
48 |     "uvicorn==0.34.0",
49 |     "python-multipart==0.0.20",
50 | ]
51 | dev = [
52 |     "uvicorn==0.34.0",
53 |     "ruff==0.6.5",
54 |     "pre-commit==4.1.0",
55 |     "jupyter==1.1.1",
56 | ]
57 | test = [
58 |     "pytest==8.3.4",
59 |     "pytest-cov==6.0.0",
60 |     "pytest-snapshot==0.9.0",
61 |     "vcrpy>=4.2.0",
62 |     "pytest-xdist==3.6.1",
63 |     "pytest-asyncio==1.0.0",
64 |     "respx",
65 |     "responses"
66 | ]
67 | 
68 | [tool.setuptools]
69 | py-modules = []
70 | 
71 | [tool.ruff]
72 | line-length = 150
73 | 
74 | [tool.ruff.lint]
75 | ignore = ["F403", "F841"] # import * and never used variables
76 | 
77 | [tool.ruff.lint.isort]
78 | force-sort-within-sections = true
79 | known-first-party = ["config", "utils", "app"]
80 | forced-separate = ["tests"]
81 | 
82 | [tool.ruff.lint.isort.sections]
83 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
84 | 
85 | [tool.pytest.ini_options]
86 | testpaths = ["tests"]
87 | 


--------------------------------------------------------------------------------
/scripts/postgres_entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | function create_database() {
 4 | 	local database=$1
 5 | 	echo "  Creating database '$database'"
 6 | 	psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
 7 | 	    CREATE DATABASE $database WITH ENCODING 'utf8';
 8 | EOSQL
 9 | }
10 | 
11 | if [[ -n "$CREATE_DB" ]]; then
12 | 	echo "Multiple database creation requested: $CREATE_DB"
13 | 	for db in $(echo $CREATE_DB | tr ',' ' '); do
14 | 		create_database $db
15 | 	done
16 | 	echo "Multiple databases created"
17 | fi


--------------------------------------------------------------------------------
/scripts/startup_api.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Environment variables
 5 | WORKERS=${WORKERS:-1}
 6 | WORKER_CONNECTIONS=${WORKER_CONNECTIONS:-1000}
 7 | TIMEOUT=${TIMEOUT:-30}
 8 | KEEP_ALIVE=${KEEP_ALIVE:-75}
 9 | GRACEFUL_TIMEOUT=${GRACEFUL_TIMEOUT:-75}
10 | GUNICORN_CMD_ARGS=${GUNICORN_CMD_ARGS:-""} # ex: --log-config app/log.conf
11 | 
12 | # Set default hosts if not already defined
13 | if [ -z "$POSTGRES_HOST" ]; then
14 |   export POSTGRES_HOST=localhost
15 | fi
16 | if [ -z "$REDIS_HOST" ]; then
17 |   export REDIS_HOST=localhost
18 | fi
19 | if [ -z "$QDRANT_HOST" ]; then
20 |   export QDRANT_HOST=localhost
21 | fi
22 | 
23 | # Run database migrations
24 | python -m alembic -c app/alembic.ini upgrade head
25 | 
26 | # Start the application server
27 | exec gunicorn app.main:app \
28 |     --workers $WORKERS \
29 |     --worker-connections $WORKER_CONNECTIONS \
30 |     --timeout $TIMEOUT \
31 |     --worker-class uvicorn.workers.UvicornWorker \
32 |     --keep-alive $KEEP_ALIVE \
33 |     --graceful-timeout $GRACEFUL_TIMEOUT \
34 |     --bind 0.0.0.0:8000 \
35 |     $GUNICORN_CMD_ARGS
36 | 


--------------------------------------------------------------------------------
/scripts/startup_ui.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Environment variables
 5 | MAX_UPLOAD_SIZE=${MAX_UPLOAD_SIZE:-20}
 6 | STREAMLIT_CMD_ARGS=${STREAMLIT_CMD_ARGS:-""}  # ex: --server.baseUrlPath=/playground
 7 | # Set default hosts if not already defined
 8 | if [ -z "$POSTGRES_HOST" ]; then
 9 |   export POSTGRES_HOST=localhost
10 | fi
11 | 
12 | # Run database migrations
13 | python -m alembic -c ui/alembic.ini upgrade head
14 | 
15 | # Start the application server
16 | if [ -f /ui/main.py ]; then
17 |     MAIN_PY_PATH=/ui/main.py
18 | else
19 |     MAIN_PY_PATH=./ui/main.py
20 | fi
21 | exec streamlit run "$MAIN_PY_PATH" \
22 |     --server.port=8501 \
23 |     --browser.gatherUsageStats false \
24 |     --theme.base=light \
25 |     --theme.primaryColor=#6a6af4 \
26 |     --server.maxUploadSize=$MAX_UPLOAD_SIZE \
27 |     $STREAMLIT_CMD_ARGS
28 | 


--------------------------------------------------------------------------------
/ui/Dockerfile:
--------------------------------------------------------------------------------
 1 | # First, build the application in the `/app` directory.
 2 | # See `Dockerfile` for details.
 3 | FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
 4 | ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 5 | 
 6 | # Disable Python downloads, because we want to use the system interpreter
 7 | # across both images. If using a managed Python version, it needs to be
 8 | # copied from the build image into the final image; see `standalone.Dockerfile`
 9 | # for an example.
10 | ENV UV_PYTHON_DOWNLOADS=0
11 | # Install build dependencies
12 | RUN apt-get update && apt-get install -y \
13 |     libpq-dev \
14 |     gcc \
15 |     python3-dev \
16 |     && rm -rf /var/lib/apt/lists/*
17 | 
18 | WORKDIR /
19 | # Copy project files
20 | COPY ./ui/ /ui
21 | RUN --mount=type=cache,target=/root/.cache/uv \
22 |     uv venv
23 | RUN --mount=type=cache,target=/root/.cache/uv \
24 |     --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
25 |     uv pip install ".[ui]"
26 | 
27 | # Final stage
28 | FROM python:3.12-slim
29 | 
30 | RUN groupadd --gid 1100 albert && \
31 |     useradd --home /ui --gid 1100 --uid 1100 albert
32 | 
33 | # Only runtime dependencies
34 | RUN apt-get update && apt-get install -y \
35 |     libpq5 \
36 |     && rm -rf /var/lib/apt/lists/*
37 | 
38 | COPY scripts/startup_ui.sh /startup.sh
39 | RUN chown albert:albert /startup.sh
40 | RUN chmod u+x /startup.sh
41 | 
42 | # Set a non-root user
43 | USER albert
44 | WORKDIR /
45 | 
46 | # Copy application from builder
47 | COPY --from=builder --chown=albert:albert /ui /ui
48 | COPY --from=builder --chown=albert:albert /.venv /.venv
49 | ENV PATH="/.venv/bin:${PATH}"
50 | ENV PYTHONPATH="/ui:${PYTHONPATH}"
51 | 
52 | # Launch the application
53 | CMD ["/startup.sh"]
54 | 


--------------------------------------------------------------------------------
/ui/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from alembic import context
 4 | from sqlalchemy import engine_from_config, pool
 5 | 
 6 | from ui.settings import settings
 7 | from ui.backend.sql.models import Base
 8 | 
 9 | config = context.config
10 | config.set_main_option(name="sqlalchemy.url", value=settings.databases.sql.args.get("url").replace("+asyncpg", ""))
11 | 
12 | if config.config_file_name is not None:
13 |     fileConfig(config.config_file_name)
14 | 
15 | target_metadata = Base.metadata
16 | 
17 | 
18 | def run_migrations_offline() -> None:
19 |     """Run migrations in 'offline' mode.
20 | 
21 |     This configures the context with just a URL
22 |     and not an Engine, though an Engine is acceptable
23 |     here as well.  By skipping the Engine creation
24 |     we don't even need a DBAPI to be available.
25 | 
26 |     Calls to context.execute() here emit the given string to the
27 |     script output.
28 | 
29 |     """
30 |     url = config.get_main_option("sqlalchemy.url")
31 |     context.configure(
32 |         url=url,
33 |         target_metadata=target_metadata,
34 |         literal_binds=True,
35 |         dialect_opts={"paramstyle": "named"},
36 |     )
37 | 
38 |     with context.begin_transaction():
39 |         context.run_migrations()
40 | 
41 | 
42 | def run_migrations_online() -> None:
43 |     """Run migrations in 'online' mode.
44 | 
45 |     In this scenario we need to create an Engine
46 |     and associate a connection with the context.
47 | 
48 |     """
49 |     connectable = engine_from_config(
50 |         config.get_section(config.config_ini_section, {}),
51 |         prefix="sqlalchemy.",
52 |         poolclass=pool.NullPool,
53 |     )
54 | 
55 |     with connectable.connect() as connection:
56 |         context.configure(connection=connection, target_metadata=target_metadata)
57 | 
58 |         with context.begin_transaction():
59 |             context.run_migrations()
60 | 
61 | 
62 | if context.is_offline_mode():
63 |     run_migrations_offline()
64 | else:
65 |     run_migrations_online()
66 | 


--------------------------------------------------------------------------------
/ui/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from typing import Sequence, Union
 9 | 
10 | from alembic import op
11 | import sqlalchemy as sa
12 | ${imports if imports else ""}
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = ${repr(up_revision)}
16 | down_revision: Union[str, None] = ${repr(down_revision)}
17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19 | 
20 | 
21 | def upgrade() -> None:
22 |     """Upgrade schema."""
23 |     ${upgrades if upgrades else "pass"}
24 | 
25 | 
26 | def downgrade() -> None:
27 |     """Downgrade schema."""
28 |     ${downgrades if downgrades else "pass"}
29 | 


--------------------------------------------------------------------------------
/ui/alembic/versions/2025_03_25_1347-647433280fa7_init_database.py:
--------------------------------------------------------------------------------
 1 | """Init database
 2 | 
 3 | Revision ID: 647433280fa7
 4 | Revises:
 5 | Create Date: 2025-03-25 13:47:38.120257
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "647433280fa7"
17 | down_revision: Union[str, None] = None
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     # ### commands auto generated by Alembic - please adjust! ###
24 |     op.create_table(
25 |         "user",
26 |         sa.Column("id", sa.Integer(), nullable=False),
27 |         sa.Column("name", sa.String(), nullable=True),
28 |         sa.Column("password", sa.String(), nullable=False),
29 |         sa.Column("api_role_id", sa.Integer(), nullable=False),
30 |         sa.Column("api_user_id", sa.Integer(), nullable=False),
31 |         sa.Column("api_key", sa.String(), nullable=True),
32 |         sa.Column("expires_at", sa.DateTime(), nullable=True),
33 |         sa.Column("created_at", sa.DateTime(), nullable=False),
34 |         sa.Column("updated_at", sa.DateTime(), nullable=False),
35 |         sa.PrimaryKeyConstraint("id"),
36 |         sa.UniqueConstraint("api_key"),
37 |         sa.UniqueConstraint("api_user_id"),
38 |     )
39 |     op.create_index(op.f("ix_user_id"), "user", ["id"], unique=False)
40 |     op.create_index(op.f("ix_user_name"), "user", ["name"], unique=True)
41 |     # ### end Alembic commands ###
42 | 
43 | 
44 | def downgrade() -> None:
45 |     # ### commands auto generated by Alembic - please adjust! ###
46 |     op.drop_index(op.f("ix_user_name"), table_name="user")
47 |     op.drop_index(op.f("ix_user_id"), table_name="user")
48 |     op.drop_table("user")
49 |     # ### end Alembic commands ###
50 | 


--------------------------------------------------------------------------------
/ui/alembic/versions/2025_04_07_1410-3ad8934ab327_remove_expires_at_column.py:
--------------------------------------------------------------------------------
 1 | """remove expires at column
 2 | 
 3 | Revision ID: 3ad8934ab327
 4 | Revises: 647433280fa7
 5 | Create Date: 2025-04-07 14:10:00.281290
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | from sqlalchemy.dialects import postgresql
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "3ad8934ab327"
17 | down_revision: Union[str, None] = "647433280fa7"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.drop_column("user", "expires_at")
26 |     # ### end Alembic commands ###
27 | 
28 | 
29 | def downgrade() -> None:
30 |     """Downgrade schema."""
31 |     # ### commands auto generated by Alembic - please adjust! ###
32 |     op.add_column("user", sa.Column("expires_at", postgresql.TIMESTAMP(), autoincrement=False, nullable=True))
33 |     # ### end Alembic commands ###
34 | 


--------------------------------------------------------------------------------
/ui/alembic/versions/2025_04_18_1923-c0bfeeca22a9_add_unique_api_token_id.py:
--------------------------------------------------------------------------------
 1 | """add unique api_token_id
 2 | 
 3 | Revision ID: c0bfeeca22a9
 4 | Revises: 3ad8934ab327
 5 | Create Date: 2025-04-18 19:23:07.064654
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | from alembic import op
12 | import sqlalchemy as sa
13 | 
14 | 
15 | # revision identifiers, used by Alembic.
16 | revision: str = "c0bfeeca22a9"
17 | down_revision: Union[str, None] = "3ad8934ab327"
18 | branch_labels: Union[str, Sequence[str], None] = None
19 | depends_on: Union[str, Sequence[str], None] = None
20 | 
21 | 
22 | def upgrade() -> None:
23 |     """Upgrade schema."""
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.add_column("user", sa.Column("api_key_id", sa.Integer(), nullable=True))
26 | 
27 |     # Set api_key_id to 0 for all existing rows
28 |     conn = op.get_bind()
29 |     conn.execute(sa.text('UPDATE "user" SET api_key_id = 0'))
30 | 
31 |     # Now make the column NOT NULL
32 |     op.alter_column("user", "api_key_id", nullable=False)
33 | 
34 |     # Continue with the rest of the migration
35 |     op.alter_column("user", "api_key", existing_type=sa.VARCHAR(), nullable=False)
36 |     op.drop_constraint("user_api_key_key", "user", type_="unique")
37 |     # ### end Alembic commands ###
38 | 
39 | 
40 | def downgrade() -> None:
41 |     """Downgrade schema."""
42 |     # ### commands auto generated by Alembic - please adjust! ###
43 |     op.create_unique_constraint("user_api_key_key", "user", ["api_key"])
44 |     op.alter_column("user", "api_key", existing_type=sa.VARCHAR(), nullable=True)
45 |     op.drop_column("user", "api_key_id")
46 |     # ### end Alembic commands ###
47 | 


--------------------------------------------------------------------------------
/ui/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/ui/backend/__init__.py


--------------------------------------------------------------------------------
/ui/backend/account.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import requests
 4 | from sqlalchemy import select, update
 5 | import streamlit as st
 6 | from streamlit_extras.stylable_container import stylable_container
 7 | 
 8 | from ui.backend.common import check_password
 9 | from ui.backend.login import get_hashed_password
10 | from ui.backend.sql.models import User as UserTable
11 | from ui.backend.sql.session import get_session
12 | from ui.settings import settings
13 | 
14 | 
15 | def change_password(current_password: str, new_password: str, confirm_password: str):
16 |     session = next(get_session())
17 |     current_password = session.execute(select(UserTable.password).where(UserTable.name == st.session_state["user"].name)).scalar_one()
18 | 
19 |     new_password = new_password.strip()
20 |     confirm_password = confirm_password.strip()
21 | 
22 |     if not check_password(current_password):
23 |         st.toast("Wrong current password", icon="❌")
24 |         return
25 | 
26 |     if new_password != confirm_password:
27 |         st.toast("New password and confirm password do not match", icon="❌")
28 |         return
29 | 
30 |     if new_password == current_password:
31 |         st.toast("New password cannot be the same as the current password", icon="❌")
32 |         return
33 | 
34 |     if not check_password(new_password):
35 |         return
36 | 
37 |     session.execute(update(UserTable).where(UserTable.name == st.session_state["user"].name).values(password=get_hashed_password(new_password)))
38 |     session.commit()
39 | 
40 |     st.toast("Password updated", icon="✅")
41 |     time.sleep(0.5)
42 |     st.session_state["login_status"] = False
43 |     st.rerun()
44 | 
45 | 
46 | def create_token(name: str, expires_at: int):
47 |     response = requests.post(
48 |         url=f"{settings.playground.api_url}/tokens",
49 |         json={"name": name, "expires_at": expires_at},
50 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
51 |     )
52 |     if response.status_code == 201:
53 |         # hind cross close icon to force a reload button
54 |         st.html(
55 |             """
56 |                 <style>
57 |                     div[aria-label="dialog"]>button[aria-label="Close"] {
58 |                         display: none;
59 |                     }
60 |                 </style>
61 |             """
62 |         )
63 | 
64 |         @st.dialog(title="Token", width="large")
65 |         def display_token():
66 |             st.warning("**⚠️ Copy the following API key to your clipboard, it will not be displayed again. Refresh the page after saving the API key.**")  # fmt: off
67 |             st.code(response.json()["token"], language="text")
68 |             with stylable_container(key="close", css_styles="button{float: right;}"):
69 |                 if st.button("**:material/close:**", key="Close", type="primary"):
70 |                     st.rerun()
71 | 
72 |         st.toast("Create succeed", icon="✅")
73 |         time.sleep(0.5)
74 |         display_token()
75 | 
76 |     else:
77 |         st.toast(response.json()["detail"], icon="❌")
78 | 
79 | 
80 | def delete_token(token_id: int):
81 |     if st.session_state["user"].api_key_id == token_id:
82 |         st.toast("Cannot delete the Playground API key", icon="❌")
83 |         return
84 | 
85 |     response = requests.delete(
86 |         url=f"{settings.playground.api_url}/tokens/{token_id}", headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"}
87 |     )
88 |     if response.status_code == 204:
89 |         st.toast("Delete succeed", icon="✅")
90 |         time.sleep(0.5)
91 |         st.rerun()
92 |     else:
93 |         st.toast(response.json()["detail"], icon="❌")
94 | 


--------------------------------------------------------------------------------
/ui/backend/chat.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from openai import OpenAI
 4 | import requests
 5 | import streamlit as st
 6 | 
 7 | from ui.settings import settings
 8 | 
 9 | 
10 | def generate_stream(messages: List[dict], params: dict, rag: bool, rerank: bool) -> Tuple[str, List[str]]:
11 |     sources = []
12 |     if rag:
13 |         prompt = messages[-1]["content"]
14 |         k = params["rag"]["k"] * 2 if rerank else params["rag"]["k"]
15 |         data = {"collections": params["rag"]["collections"], "k": k, "prompt": messages[-1]["content"], "score_threshold": None}
16 |         response = requests.post(
17 |             url=f"{settings.playground.api_url}/v1/search", json=data, headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"}
18 |         )
19 |         assert response.status_code == 200, f"{response.status_code} - {response.json()}"
20 | 
21 |         prompt_template = """Réponds à la question suivante de manière claire en te basant sur les extraits de documents ci-dessous. Si les documents ne sont pas pertinents pour répondre à la question, réponds que tu ne sais pas ou réponds directement la question à l'aide de tes connaissances. Réponds en français.
22 | La question de l'utilisateur est : {prompt}
23 | 
24 | Les documents sont :
25 | 
26 | {chunks}
27 | """
28 |         chunks = [chunk["chunk"] for chunk in response.json()["data"]]
29 | 
30 |         if rerank:
31 |             data = {
32 |                 "prompt": prompt,
33 |                 "input": [chunk["content"] for chunk in chunks],
34 |             }
35 |             response = requests.post(
36 |                 url=f"{settings.playground.api_url}/v1/rerank", json=data, headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"}
37 |             )
38 |             assert response.status_code == 200, f"{response.status_code} - {response.json()}"
39 | 
40 |             rerank_scores = sorted(response.json()["data"], key=lambda x: x["score"])
41 |             chunks = [chunks[result["index"]] for result in rerank_scores[: params["rag"]["k"]]]
42 | 
43 |         sources = list(set([chunk["metadata"]["document_name"] for chunk in chunks]))
44 |         chunks = [chunk["content"] for chunk in chunks]
45 |         prompt = prompt_template.format(prompt=prompt, chunks="\n\n".join(chunks))
46 |         messages = messages[:-1] + [{"role": "user", "content": prompt}]
47 | 
48 |     client = OpenAI(base_url=f"{settings.playground.api_url}/v1", api_key=st.session_state["user"].api_key)
49 |     stream = client.chat.completions.create(stream=True, messages=messages, **params["sampling_params"])
50 | 
51 |     return stream, sources
52 | 


--------------------------------------------------------------------------------
/ui/backend/documents.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from typing import Optional
 3 | 
 4 | import requests
 5 | import streamlit as st
 6 | 
 7 | from ui.settings import settings
 8 | 
 9 | 
10 | def create_collection(name: str, description: str) -> None:
11 |     response = requests.post(
12 |         url=f"{settings.playground.api_url}/v1/collections",
13 |         json={"name": name, "description": description},
14 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
15 |     )
16 | 
17 |     if response.status_code != 201:
18 |         st.toast(response.json()["detail"], icon="❌")
19 |         return
20 | 
21 |     st.toast("Create succeed", icon="✅")
22 |     st.session_state["new_collection"] = False
23 |     time.sleep(0.5)
24 |     st.rerun()
25 | 
26 | 
27 | def update_collection(collection_id: int, name: Optional[str] = None, description: Optional[str] = None) -> None:
28 |     params = {}
29 |     if name:
30 |         params["name"] = name
31 |     if description:
32 |         params["description"] = description
33 | 
34 |     response = requests.patch(
35 |         url=f"{settings.playground.api_url}/v1/collections/{collection_id}",
36 |         json=params,
37 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
38 |     )
39 | 
40 |     if response.status_code != 204:
41 |         st.toast(response.json()["detail"], icon="❌")
42 |         return
43 | 
44 |     st.toast("Update succeed", icon="✅")
45 |     time.sleep(0.5)
46 |     st.rerun()
47 | 
48 | 
49 | def delete_collection(collection_id: int) -> None:
50 |     response = requests.delete(
51 |         url=f"{settings.playground.api_url}/v1/collections/{collection_id}",
52 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
53 |     )
54 | 
55 |     if response.status_code != 204:
56 |         st.toast(response.json()["detail"], icon="❌")
57 |         return
58 | 
59 |     st.toast("Delete succeed", icon="✅")
60 |     time.sleep(0.5)
61 |     st.rerun()
62 | 
63 | 
64 | def upload_document(file, collection_id: str) -> None:
65 |     response = requests.post(
66 |         url=f"{settings.playground.api_url}/v1/documents",
67 |         data={"collection": collection_id},
68 |         files={"file": (file.name, file.getvalue(), file.type)},
69 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
70 |     )
71 | 
72 |     if response.status_code != 201:
73 |         st.toast(response.json()["detail"], icon="❌")
74 |         return
75 | 
76 |     st.toast("Upload succeed", icon="✅")
77 |     time.sleep(0.5)
78 |     st.rerun()
79 | 
80 | 
81 | def delete_document(document_id: str) -> None:
82 |     response = requests.delete(
83 |         url=f"{settings.playground.api_url}/v1/documents/{document_id}",
84 |         headers={"Authorization": f"Bearer {st.session_state["user"].api_key}"},
85 |     )
86 | 
87 |     if response.status_code != 204:
88 |         st.toast(response.json()["detail"], icon="❌")
89 |         return
90 | 
91 |     st.toast("Delete succeed", icon="✅")
92 |     time.sleep(0.5)
93 |     st.rerun()
94 | 


--------------------------------------------------------------------------------
/ui/backend/login.py:
--------------------------------------------------------------------------------
 1 | import bcrypt
 2 | from pydantic import BaseModel
 3 | import requests
 4 | from sqlalchemy import select
 5 | from sqlalchemy.orm import Session
 6 | import streamlit as st
 7 | 
 8 | from ui.backend.sql.models import User as UserTable
 9 | from ui.settings import settings
10 | from ui.variables import ADMIN_PERMISSIONS
11 | 
12 | 
13 | class User(BaseModel):
14 |     id: int
15 |     name: str
16 |     api_key_id: int
17 |     api_key: str
18 |     role: dict
19 |     user: dict
20 | 
21 | 
22 | def get_hashed_password(password: str) -> str:
23 |     return bcrypt.hashpw(password=password.encode(encoding="utf-8"), salt=bcrypt.gensalt()).decode(encoding="utf-8")
24 | 
25 | 
26 | def check_password(password: str, hashed_password: str) -> bool:
27 |     return bcrypt.checkpw(password=password.encode(encoding="utf-8"), hashed_password=hashed_password.encode(encoding="utf-8"))
28 | 
29 | 
30 | def login(user_name: str, user_password: str, session: Session) -> dict:
31 |     # master login flow
32 |     if user_name == settings.auth.master_username:
33 |         response = requests.get(url=f"{settings.playground.api_url}/users/me", headers={"Authorization": f"Bearer {user_password}"})
34 |         if response.status_code != 404:  # only master get 404 on /users/me
35 |             st.error(response.json()["detail"])
36 |             st.stop()
37 | 
38 |         response = requests.get(url=f"{settings.playground.api_url}/v1/models", headers={"Authorization": f"Bearer {user_password}"})
39 |         if response.status_code != 200:
40 |             st.error(response.json()["detail"])
41 |             st.stop()
42 |         models = response.json()["data"]
43 | 
44 |         limits = []
45 |         for model in models:
46 |             limits.append({"model": model["id"], "type": "tpm", "value": None})
47 |             limits.append({"model": model["id"], "type": "tpd", "value": None})
48 |             limits.append({"model": model["id"], "type": "rpm", "value": None})
49 |             limits.append({"model": model["id"], "type": "rpd", "value": None})
50 | 
51 |         role = {"object": "role", "id": 0, "name": "master", "default": False, "permissions": ADMIN_PERMISSIONS, "limits": limits}
52 |         user = User(
53 |             id=0,
54 |             name=settings.auth.master_username,
55 |             api_key=user_password,
56 |             api_key_id=0,
57 |             user={"expires_at": None, "budget": None},
58 |             role=role,
59 |         )
60 | 
61 |         st.session_state["login_status"] = True
62 |         st.session_state["user"] = user
63 |         st.rerun()
64 | 
65 |     # basic login flow
66 |     db_user = session.execute(select(UserTable).where(UserTable.name == user_name)).scalar_one_or_none()
67 |     if not db_user:
68 |         st.error("Invalid username or password")
69 |         st.stop()
70 | 
71 |     if not check_password(password=user_password, hashed_password=db_user.password):
72 |         st.error("Invalid username or password")
73 |         st.stop()
74 | 
75 |     response = requests.get(url=f"{settings.playground.api_url}/users/me", headers={"Authorization": f"Bearer {db_user.api_key}"})
76 |     if response.status_code != 200:
77 |         st.error(response.json()["detail"])
78 |         st.stop()
79 |     user = response.json()
80 | 
81 |     response = requests.get(url=f"{settings.playground.api_url}/roles/me", headers={"Authorization": f"Bearer {db_user.api_key}"})
82 |     if response.status_code != 200:
83 |         st.error(response.json()["detail"])
84 |         st.stop()
85 |     role = response.json()
86 | 
87 |     user = User(id=db_user.id, name=db_user.name, api_key_id=db_user.api_key_id, api_key=db_user.api_key, user=user, role=role)
88 | 
89 |     st.session_state["login_status"] = True
90 |     st.session_state["user"] = user
91 |     st.rerun()
92 | 


--------------------------------------------------------------------------------
/ui/backend/sql/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/etalab-ia/albert-api/0446f06baa3d561bb1bbd34ab1dd9a0441dd9302/ui/backend/sql/__init__.py


--------------------------------------------------------------------------------
/ui/backend/sql/models.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, DateTime, Integer, String
 2 | from sqlalchemy.orm import declarative_base
 3 | from sqlalchemy.sql import func
 4 | 
 5 | Base = declarative_base()
 6 | 
 7 | 
 8 | class User(Base):
 9 |     __tablename__ = "user"
10 | 
11 |     id = Column(Integer, primary_key=True, index=True)
12 |     name = Column(String, index=True, unique=True)
13 |     password = Column(String, nullable=False)
14 |     api_role_id = Column(Integer, nullable=False)
15 |     api_user_id = Column(Integer, unique=True, nullable=False)
16 |     api_key_id = Column(Integer, nullable=False)
17 |     api_key = Column(String, nullable=False)
18 |     created_at = Column(DateTime, default=func.now(), nullable=False)
19 |     updated_at = Column(DateTime, default=func.now(), nullable=False, onupdate=func.now())
20 | 


--------------------------------------------------------------------------------
/ui/backend/sql/session.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine
 2 | from sqlalchemy.orm import sessionmaker
 3 | 
 4 | from ui.settings import settings
 5 | 
 6 | engine = create_engine(**settings.databases.sql.args)
 7 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 8 | 
 9 | 
10 | def get_session():
11 |     session = SessionLocal()
12 |     try:
13 |         yield session
14 |     finally:
15 |         session.close()
16 | 


--------------------------------------------------------------------------------
/ui/frontend/header.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import streamlit as st
 4 | from streamlit_extras.stylable_container import stylable_container
 5 | 
 6 | from ui.backend.login import login
 7 | from ui.backend.sql.session import get_session
 8 | 
 9 | 
10 | def header():
11 |     def authenticate():
12 |         session = next(get_session())
13 | 
14 |         @st.dialog(title="Login")
15 |         def login_form():
16 |             with st.form(key="login"):
17 |                 user_name = st.text_input(label="Email", type="default", key="user_id", icon=":material/email:")
18 |                 user_password = st.text_input(label="Password", type="password", key="password", icon=":material/lock:")
19 | 
20 |                 # strip input
21 |                 user_name = user_name.strip()
22 |                 user_password = user_password.strip()
23 | 
24 |                 submit = st.form_submit_button(label="Submit")
25 |                 if submit:
26 |                     login(user_name, user_password, session)
27 | 
28 |         if st.session_state.get("login_status") is None:
29 |             login_form()
30 | 
31 |     with stylable_container(key="Header", css_styles="button{float: right;}"):
32 |         col1, col2 = st.columns(2)
33 |         with col1:
34 |             st.subheader("Albert playground")
35 | 
36 |         # Authentication
37 |         authenticate()
38 |         if st.session_state.get("login_status") is None:
39 |             st.stop()
40 | 
41 |         with col2:
42 |             logout = st.button("Logout")
43 |         if logout:
44 |             st.session_state.pop("login_status", default=None)
45 |             st.session_state.pop("user", default=None)
46 |             st.session_state.pop("api_key", default=None)
47 |             st.cache_data.clear()
48 |             st.rerun()
49 | 
50 |         if st.session_state.get("user") and st.session_state["user"].role["name"] == "master":
51 |             st.warning("You are logged in as the master user. This is not recommended for production use, please use a regular user instead.")
52 |         if st.session_state.get("user") and st.session_state["user"].user["expires_at"] and st.session_state["user"].user["expires_at"] < int(time.time()):  # fmt: off
53 |             st.warning("**Your account has expired. Please contact support to renew your account.**")
54 |         st.markdown("***")
55 | 


--------------------------------------------------------------------------------
/ui/frontend/transcription.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import traceback
 3 | 
 4 | from openai import OpenAI
 5 | import streamlit as st
 6 | 
 7 | from ui.backend.common import get_models, settings
 8 | from ui.frontend.header import header
 9 | from ui.variables import MODEL_TYPE_AUDIO, TRANSCRIPTION_SUPPORTED_LANGUAGES
10 | 
11 | header()
12 | 
13 | # Data
14 | try:
15 |     models = get_models(types=[MODEL_TYPE_AUDIO])
16 | except Exception:
17 |     st.error(body="Error to fetch user data.")
18 |     logging.error(traceback.format_exc())
19 |     st.stop()
20 | 
21 | openai_client = OpenAI(base_url=f"{settings.playground.api_url}/v1", api_key=st.session_state["user"].api_key)
22 | 
23 | # Sidebar
24 | with st.sidebar:
25 |     params = {}
26 |     st.subheader(body="Audio parameters")
27 |     params["model"] = st.selectbox(label="Audio model", options=models)
28 |     params["temperature"] = st.slider(label="Temperature", value=0.2, min_value=0.0, max_value=1.0, step=0.1)
29 |     params["language"] = st.selectbox(
30 |         label="Language", options=TRANSCRIPTION_SUPPORTED_LANGUAGES, index=TRANSCRIPTION_SUPPORTED_LANGUAGES.index("french")
31 |     )
32 | 
33 | # Main
34 | col1, col2 = st.columns(spec=2)
35 | file = st.file_uploader(label="Upload an audio file", type=["mp3", "wav", "m4a"])
36 | record = st.audio_input(label="Record a voice message")
37 | 
38 | if file and record:
39 |     st.error(body="Please upload only one file at a time.")
40 |     st.stop()
41 | 
42 | audio = record or file
43 | result = None
44 | _, center, _ = st.columns(spec=3)
45 | with center:
46 |     submit = st.button(label="**Transcribe**", use_container_width=True)
47 | 
48 | if submit and audio:
49 |     with st.spinner(text="Transcribing audio..."):
50 |         try:
51 |             response = openai_client.audio.transcriptions.create(
52 |                 file=audio,
53 |                 model=params["model"],
54 |                 temperature=params["temperature"],
55 |             )
56 |             result = response.text
57 |         except Exception:
58 |             st.error(body="Error transcribing audio.")
59 |             logging.error(traceback.format_exc())
60 | 
61 | if result:
62 |     st.caption(body="Result")
63 |     st.code(body=result, language="markdown", wrap_lines=True)
64 | 


--------------------------------------------------------------------------------
/ui/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | 
 6 | import streamlit as st
 7 | 
 8 | from ui.settings import settings
 9 | 
10 | st.set_page_config(
11 |     page_title="Albert playground",
12 |     page_icon=settings.playground.page_icon,
13 |     layout="wide",
14 |     initial_sidebar_state="expanded",
15 |     menu_items={
16 |         "Get Help": settings.playground.menu_items.get_help,
17 |         "Report a bug": settings.playground.menu_items.report_a_bug,
18 |         "About": settings.playground.menu_items.about,
19 |     },
20 | )
21 | 
22 | st.logo(image=settings.playground.logo, link=settings.playground.home_url, size="large")
23 | 
24 | # Set the width of the sidebar to 400px
25 | st.markdown(
26 |     """
27 |     <style>
28 |         section[data-testid="stSidebar"] {
29 |             width: 400px !important;
30 |         }
31 |     </style>
32 |     """,
33 |     unsafe_allow_html=True,
34 | )
35 | 
36 | pg = st.navigation(
37 |     pages=[
38 |         st.Page(page="frontend/account.py", title="My account", icon=":material/account_circle:"),
39 |         st.Page(page="frontend/chat.py", title="Chat", icon=":material/chat:"),
40 |         st.Page(page="frontend/documents.py", title="Documents", icon=":material/file_copy:"),
41 |         st.Page(page="frontend/summarize.py", title="Summarize", icon=":material/contract_edit:"),
42 |         st.Page(page="frontend/transcription.py", title="Transcription", icon=":material/graphic_eq:"),
43 |         st.Page(page="frontend/admin.py", title="Admin", icon=":material/admin_panel_settings:"),
44 |     ]
45 | )
46 | pg.run()
47 | 


--------------------------------------------------------------------------------
/ui/variables.py:
--------------------------------------------------------------------------------
  1 | MODEL_TYPE_AUDIO = "automatic-speech-recognition"
  2 | MODEL_TYPE_EMBEDDINGS = "text-embeddings-inference"
  3 | MODEL_TYPE_IMAGE_TEXT_TO_TEXT = "image-text-to-text"
  4 | MODEL_TYPE_LANGUAGE = "text-generation"
  5 | MODEL_TYPE_RERANK = "text-classification"
  6 | 
  7 | COLLECTION_VISIBILITY_PRIVATE = "private"
  8 | 
  9 | ADMIN_PERMISSIONS = [
 10 |     "create_role",
 11 |     "read_role",
 12 |     "update_role",
 13 |     "delete_role",
 14 |     "create_user",
 15 |     "read_user",
 16 |     "update_user",
 17 |     "delete_user",
 18 | ]
 19 | 
 20 | TRANSCRIPTION_SUPPORTED_LANGUAGES = [
 21 |     "afrikaans",
 22 |     "albanian",
 23 |     "amharic",
 24 |     "arabic",
 25 |     "armenian",
 26 |     "assamese",
 27 |     "azerbaijani",
 28 |     "bashkir",
 29 |     "basque",
 30 |     "belarusian",
 31 |     "bengali",
 32 |     "bosnian",
 33 |     "breton",
 34 |     "bulgarian",
 35 |     "burmese",
 36 |     "cantonese",
 37 |     "castilian",
 38 |     "catalan",
 39 |     "chinese",
 40 |     "croatian",
 41 |     "czech",
 42 |     "danish",
 43 |     "dutch",
 44 |     "english",
 45 |     "estonian",
 46 |     "faroese",
 47 |     "finnish",
 48 |     "flemish",
 49 |     "french",
 50 |     "galician",
 51 |     "georgian",
 52 |     "german",
 53 |     "greek",
 54 |     "gujarati",
 55 |     "haitian",
 56 |     "haitian creole",
 57 |     "hausa",
 58 |     "hawaiian",
 59 |     "hebrew",
 60 |     "hindi",
 61 |     "hungarian",
 62 |     "icelandic",
 63 |     "indonesian",
 64 |     "italian",
 65 |     "japanese",
 66 |     "javanese",
 67 |     "kannada",
 68 |     "kazakh",
 69 |     "khmer",
 70 |     "korean",
 71 |     "lao",
 72 |     "latin",
 73 |     "latvian",
 74 |     "letzeburgesch",
 75 |     "lingala",
 76 |     "lithuanian",
 77 |     "luxembourgish",
 78 |     "macedonian",
 79 |     "malagasy",
 80 |     "malay",
 81 |     "malayalam",
 82 |     "maltese",
 83 |     "mandarin",
 84 |     "maori",
 85 |     "marathi",
 86 |     "moldavian",
 87 |     "moldovan",
 88 |     "mongolian",
 89 |     "myanmar",
 90 |     "nepali",
 91 |     "norwegian",
 92 |     "nynorsk",
 93 |     "occitan",
 94 |     "panjabi",
 95 |     "pashto",
 96 |     "persian",
 97 |     "polish",
 98 |     "portuguese",
 99 |     "punjabi",
100 |     "pushto",
101 |     "romanian",
102 |     "russian",
103 |     "sanskrit",
104 |     "serbian",
105 |     "shona",
106 |     "sindhi",
107 |     "sinhala",
108 |     "sinhalese",
109 |     "slovak",
110 |     "slovenian",
111 |     "somali",
112 |     "spanish",
113 |     "sundanese",
114 |     "swahili",
115 |     "swedish",
116 |     "tagalog",
117 |     "tajik",
118 |     "tamil",
119 |     "tatar",
120 |     "telugu",
121 |     "thai",
122 |     "tibetan",
123 |     "turkish",
124 |     "turkmen",
125 |     "ukrainian",
126 |     "urdu",
127 |     "uzbek",
128 |     "valencian",
129 |     "vietnamese",
130 |     "welsh",
131 |     "yiddish",
132 |     "yoruba",
133 | ]
134 | 


--------------------------------------------------------------------------------