├── version.txt
├── local_data
└── .gitignore
├── models
└── .gitignore
├── private_gpt
├── components
│ ├── __init__.py
│ ├── ingest
│ │ ├── __init__.py
│ │ └── ingest_helper.py
│ ├── embedding
│ │ ├── __init__.py
│ │ ├── custom
│ │ │ ├── __init__.py
│ │ │ └── sagemaker.py
│ │ └── embedding_component.py
│ ├── llm
│ │ ├── custom
│ │ │ └── __init__.py
│ │ └── __init__.py
│ ├── node_store
│ │ ├── __init__.py
│ │ └── node_store_component.py
│ └── vector_store
│ │ ├── __init__.py
│ │ ├── batched_chroma.py
│ │ └── vector_store_component.py
├── server
│ ├── chat
│ │ ├── __init__.py
│ │ ├── chat_router.py
│ │ └── chat_service.py
│ ├── chunks
│ │ ├── __init__.py
│ │ ├── chunks_router.py
│ │ └── chunks_service.py
│ ├── health
│ │ ├── __init__.py
│ │ └── health_router.py
│ ├── ingest
│ │ ├── __init__.py
│ │ ├── model.py
│ │ ├── ingest_watcher.py
│ │ ├── ingest_router.py
│ │ └── ingest_service.py
│ ├── utils
│ │ ├── __init__.py
│ │ └── auth.py
│ ├── embeddings
│ │ ├── __init__.py
│ │ ├── embeddings_service.py
│ │ └── embeddings_router.py
│ ├── __init__.py
│ └── completions
│ │ ├── __init__.py
│ │ └── completions_router.py
├── settings
│ ├── __init__.py
│ ├── yaml.py
│ └── settings_loader.py
├── ui
│ ├── __init__.py
│ ├── avatar-bot.ico
│ └── images.py
├── utils
│ ├── __init__.py
│ ├── typing.py
│ └── eta.py
├── open_ai
│ ├── __init__.py
│ ├── extensions
│ │ ├── __init__.py
│ │ └── context_filter.py
│ └── openai_models.py
├── constants.py
├── main.py
├── __main__.py
├── paths.py
├── di.py
├── __init__.py
└── launcher.py
├── tests
├── __init__.py
├── fixtures
│ ├── __init__.py
│ ├── fast_api_test_client.py
│ ├── auto_close_qdrant.py
│ ├── ingest_helper.py
│ └── mock_injector.py
├── server
│ ├── ingest
│ │ ├── test.pdf
│ │ ├── test_ingest_routes.py
│ │ └── test.txt
│ ├── chunks
│ │ ├── chunk_test.txt
│ │ └── test_chunk_routes.py
│ ├── utils
│ │ ├── test_auth.py
│ │ └── test_simple_auth.py
│ ├── embeddings
│ │ └── test_embedding_routes.py
│ └── chat
│ │ └── test_chat_routes.py
├── ui
│ └── test_ui.py
├── conftest.py
├── settings
│ ├── test_settings.py
│ └── test_settings_loader.py
└── test_prompt_helper.py
├── tiktoken_cache
└── .gitignore
├── scripts
├── __init__.py
├── extract_openapi.py
├── setup
├── ingest_folder.py
└── utils.py
├── fern
├── fern.config.json
├── docs
│ ├── assets
│ │ ├── ui.png
│ │ ├── favicon.ico
│ │ ├── header.jpeg
│ │ ├── logo_dark.png
│ │ └── logo_light.png
│ └── pages
│ │ ├── manual
│ │ ├── ingestion-reset.mdx
│ │ ├── reranker.mdx
│ │ ├── nodestore.mdx
│ │ ├── ui.mdx
│ │ ├── settings.mdx
│ │ └── ingestion.mdx
│ │ ├── api-reference
│ │ ├── api-reference.mdx
│ │ └── sdks.mdx
│ │ ├── overview
│ │ └── welcome.mdx
│ │ ├── installation
│ │ └── concepts.mdx
│ │ └── recipes
│ │ └── list-llm.mdx
├── generators.yml
├── README.md
└── docs.yml
├── .dockerignore
├── settings-gemini.yaml
├── settings-openai.yaml
├── settings-mock.yaml
├── settings-sagemaker.yaml
├── settings-test.yaml
├── docker-compose.yaml
├── .github
└── workflows
│ ├── release-please.yml
│ ├── fern-check.yml
│ ├── publish-docs.yml
│ ├── actions
│ └── install_dependencies
│ │ └── action.yml
│ ├── stale.yml
│ ├── docker.yml
│ ├── preview-docs.yml
│ └── tests.yml
├── .gitignore
├── settings-azopenai.yaml
├── settings-vllm.yaml
├── CITATION.cff
├── settings-local.yaml
├── settings-ollama-pg.yaml
├── Dockerfile.external
├── .pre-commit-config.yaml
├── settings-docker.yaml
├── Dockerfile.local
├── settings-ollama.yaml
├── Makefile
├── settings.yaml
├── pyproject.toml
└── README.md
/version.txt:
--------------------------------------------------------------------------------
1 | 0.5.0
2 |
--------------------------------------------------------------------------------
/local_data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/private_gpt/components/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/chat/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/chunks/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/health/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/ingest/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests."""
2 |
--------------------------------------------------------------------------------
/private_gpt/components/ingest/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/server/embeddings/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tiktoken_cache/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/private_gpt/components/embedding/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/components/llm/custom/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/components/node_store/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/components/vector_store/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/components/embedding/custom/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/private_gpt/settings/__init__.py:
--------------------------------------------------------------------------------
1 | """Settings."""
2 |
--------------------------------------------------------------------------------
/private_gpt/ui/__init__.py:
--------------------------------------------------------------------------------
1 | """Gradio based UI."""
2 |
--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """PrivateGPT scripts."""
2 |
--------------------------------------------------------------------------------
/tests/fixtures/__init__.py:
--------------------------------------------------------------------------------
1 | """Global fixtures."""
2 |
--------------------------------------------------------------------------------
/private_gpt/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """general utils."""
2 |
--------------------------------------------------------------------------------
/private_gpt/server/__init__.py:
--------------------------------------------------------------------------------
1 | """private-gpt server."""
2 |
--------------------------------------------------------------------------------
/private_gpt/components/llm/__init__.py:
--------------------------------------------------------------------------------
1 | """LLM implementations."""
2 |
--------------------------------------------------------------------------------
/private_gpt/open_ai/__init__.py:
--------------------------------------------------------------------------------
1 | """OpenAI compatibility utilities."""
2 |
--------------------------------------------------------------------------------
/private_gpt/open_ai/extensions/__init__.py:
--------------------------------------------------------------------------------
1 | """OpenAI API extensions."""
2 |
--------------------------------------------------------------------------------
/fern/fern.config.json:
--------------------------------------------------------------------------------
1 | {
2 | "organization": "privategpt",
3 | "version": "0.31.17"
4 | }
--------------------------------------------------------------------------------
/private_gpt/server/completions/__init__.py:
--------------------------------------------------------------------------------
1 | """Deprecated Openai compatibility endpoint."""
2 |
--------------------------------------------------------------------------------
/fern/docs/assets/ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/ui.png
--------------------------------------------------------------------------------
/private_gpt/constants.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | PROJECT_ROOT_PATH: Path = Path(__file__).parents[1]
4 |
--------------------------------------------------------------------------------
/fern/docs/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/favicon.ico
--------------------------------------------------------------------------------
/fern/docs/assets/header.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/header.jpeg
--------------------------------------------------------------------------------
/fern/docs/assets/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/logo_dark.png
--------------------------------------------------------------------------------
/private_gpt/ui/avatar-bot.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/private_gpt/ui/avatar-bot.ico
--------------------------------------------------------------------------------
/private_gpt/utils/typing.py:
--------------------------------------------------------------------------------
1 | from typing import TypeVar
2 |
3 | T = TypeVar("T")
4 | K = TypeVar("K")
5 | V = TypeVar("V")
6 |
--------------------------------------------------------------------------------
/tests/server/ingest/test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/tests/server/ingest/test.pdf
--------------------------------------------------------------------------------
/fern/docs/assets/logo_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/logo_light.png
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .venv
2 | models
3 | .github
4 | .vscode
5 | .DS_Store
6 | .mypy_cache
7 | .ruff_cache
8 | local_data
9 | terraform
10 | tests
11 | Dockerfile
12 | Dockerfile.*
--------------------------------------------------------------------------------
/tests/server/chunks/chunk_test.txt:
--------------------------------------------------------------------------------
1 | e88c1005-637d-4cb4-ae79-9b8eb58cab97
2 |
3 | b483dd15-78c4-4d67-b546-21a0d690bf43
4 |
5 | a8080238-b294-4598-ac9c-7abf4c8e0552
6 |
7 | 14208dac-c600-4a18-872b-5e45354cfff2
--------------------------------------------------------------------------------
/settings-gemini.yaml:
--------------------------------------------------------------------------------
1 | llm:
2 | mode: gemini
3 |
4 | embedding:
5 | mode: gemini
6 |
7 | gemini:
8 | api_key: ${GOOGLE_API_KEY:}
9 | model: models/gemini-pro
10 | embedding_model: models/embedding-001
11 |
--------------------------------------------------------------------------------
/settings-openai.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:openai}
3 |
4 | llm:
5 | mode: openai
6 |
7 | embedding:
8 | mode: openai
9 |
10 | openai:
11 | api_key: ${OPENAI_API_KEY:}
12 | model: gpt-3.5-turbo
13 |
--------------------------------------------------------------------------------
/fern/generators.yml:
--------------------------------------------------------------------------------
1 | groups:
2 | public:
3 | generators:
4 | - name: fernapi/fern-python-sdk
5 | version: 0.6.2
6 | output:
7 | location: local-file-system
8 | path: ../../pgpt-sdk/python
9 |
--------------------------------------------------------------------------------
/private_gpt/main.py:
--------------------------------------------------------------------------------
1 | """FastAPI app creation, logger configuration and main API routes."""
2 |
3 | from private_gpt.di import global_injector
4 | from private_gpt.launcher import create_app
5 |
6 | app = create_app(global_injector)
7 |
--------------------------------------------------------------------------------
/private_gpt/open_ai/extensions/context_filter.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, Field
2 |
3 |
4 | class ContextFilter(BaseModel):
5 | docs_ids: list[str] | None = Field(
6 | examples=[["c202d5e6-7b69-4869-81cc-dd574ee8ee11"]]
7 | )
8 |
--------------------------------------------------------------------------------
/settings-mock.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:mock}
3 |
4 | # This configuration allows you to use GPU for creating embeddings while avoiding loading LLM into vRAM
5 | llm:
6 | mode: mock
7 |
8 | embedding:
9 | mode: huggingface
10 |
--------------------------------------------------------------------------------
/tests/server/utils/test_auth.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 |
4 | def test_default_does_not_require_auth(test_client: TestClient) -> None:
5 | response_before = test_client.get("/v1/ingest/list")
6 | assert response_before.status_code == 200
7 |
--------------------------------------------------------------------------------
/settings-sagemaker.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:sagemaker}
3 | port: ${PORT:8001}
4 |
5 | ui:
6 | enabled: true
7 | path: /
8 |
9 | llm:
10 | mode: sagemaker
11 |
12 | embedding:
13 | mode: sagemaker
14 |
15 | sagemaker:
16 | llm_endpoint_name: llm
17 | embedding_endpoint_name: embedding
--------------------------------------------------------------------------------
/settings-test.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: test
3 | auth:
4 | enabled: false
5 | # Dummy secrets used for tests
6 | secret: "foo bar; dummy secret"
7 |
8 | data:
9 | local_data_folder: local_data/tests
10 |
11 | qdrant:
12 | path: local_data/tests
13 |
14 | llm:
15 | mode: mock
16 |
17 | embedding:
18 | mode: mock
19 |
20 | ui:
21 | enabled: false
--------------------------------------------------------------------------------
/tests/ui/test_ui.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from fastapi.testclient import TestClient
3 |
4 |
5 | @pytest.mark.parametrize(
6 | "test_client", [{"ui": {"enabled": True, "path": "/ui"}}], indirect=True
7 | )
8 | def test_ui_starts_in_the_given_endpoint(test_client: TestClient) -> None:
9 | response = test_client.get("/ui")
10 | assert response.status_code == 200
11 |
--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | private-gpt:
3 | build:
4 | dockerfile: Dockerfile.external
5 | volumes:
6 | - ./local_data/:/home/worker/app/local_data
7 | ports:
8 | - 8001:8080
9 | environment:
10 | PORT: 8080
11 | PGPT_PROFILES: docker
12 | PGPT_MODE: ollama
13 | ollama:
14 | image: ollama/ollama:latest
15 | volumes:
16 | - ./models:/root/.ollama
17 |
--------------------------------------------------------------------------------
/.github/workflows/release-please.yml:
--------------------------------------------------------------------------------
1 | name: release-please
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | permissions:
9 | contents: write
10 | pull-requests: write
11 |
12 | jobs:
13 | release-please:
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: google-github-actions/release-please-action@v3
17 | with:
18 | release-type: simple
19 | version-file: version.txt
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .env
3 | venv
4 |
5 | settings-me.yaml
6 |
7 | .ruff_cache
8 | .pytest_cache
9 | .mypy_cache
10 |
11 | # byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 |
15 | # unit tests / coverage reports
16 | /tests-results.xml
17 | /.coverage
18 | /coverage.xml
19 | /htmlcov/
20 |
21 | # pyenv
22 | /.python-version
23 |
24 | # IDE
25 | .idea/
26 | .vscode/
27 | /.run/
28 | .fleet/
29 |
30 | # macOS
31 | .DS_Store
32 |
--------------------------------------------------------------------------------
/.github/workflows/fern-check.yml:
--------------------------------------------------------------------------------
1 | name: fern check
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - main
7 | paths:
8 | - "fern/**"
9 |
10 | jobs:
11 | fern-check:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - name: Checkout repo
15 | uses: actions/checkout@v4
16 |
17 | - name: Install Fern
18 | run: npm install -g fern-api
19 |
20 | - name: Check Fern API is valid
21 | run: fern check
--------------------------------------------------------------------------------
/fern/docs/pages/manual/ingestion-reset.mdx:
--------------------------------------------------------------------------------
1 | # Reset Local documents database
2 |
3 | When running in a local setup, you can remove all ingested documents by simply
4 | deleting all contents of `local_data` folder (except .gitignore).
5 |
6 | To simplify this process, you can use the command:
7 | ```bash
8 | make wipe
9 | ```
10 |
11 | # Advanced usage
12 |
13 | You can actually delete your documents from your storage by using the
14 | API endpoint `DELETE` in the Ingestion API.
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | from glob import glob
4 |
5 | root_path = pathlib.Path(__file__).parents[1]
6 | # This is to prevent a bug in intellij that uses the wrong working directory
7 | os.chdir(root_path)
8 |
9 |
10 | def _as_module(fixture_path: str) -> str:
11 | return fixture_path.replace("/", ".").replace("\\", ".").replace(".py", "")
12 |
13 |
14 | pytest_plugins = [_as_module(fixture) for fixture in glob("tests/fixtures/[!_]*.py")]
15 |
--------------------------------------------------------------------------------
/private_gpt/__main__.py:
--------------------------------------------------------------------------------
1 | # start a fastapi server with uvicorn
2 |
3 | import uvicorn
4 |
5 | from private_gpt.main import app
6 | from private_gpt.settings.settings import settings
7 |
8 | # Set log_config=None to do not use the uvicorn logging configuration, and
9 | # use ours instead. For reference, see below:
10 | # https://github.com/tiangolo/fastapi/discussions/7457#discussioncomment-5141108
11 | uvicorn.run(app, host="0.0.0.0", port=settings().server.port, log_config=None)
12 |
--------------------------------------------------------------------------------
/settings-azopenai.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:azopenai}
3 |
4 | llm:
5 | mode: azopenai
6 |
7 | embedding:
8 | mode: azopenai
9 |
10 | azopenai:
11 | api_key: ${AZ_OPENAI_API_KEY:}
12 | azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
13 | embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
14 | llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
15 | api_version: "2023-05-15"
16 | embedding_model: text-embedding-ada-002
17 | llm_model: gpt-35-turbo
--------------------------------------------------------------------------------
/settings-vllm.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:vllm}
3 |
4 | llm:
5 | mode: openailike
6 | max_new_tokens: 512
7 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2
8 | temperature: 0.1
9 |
10 | embedding:
11 | mode: huggingface
12 | ingest_mode: simple
13 |
14 | huggingface:
15 | embedding_hf_model_name: BAAI/bge-small-en-v1.5
16 |
17 | openai:
18 | api_base: http://localhost:8000/v1
19 | api_key: EMPTY
20 | model: facebook/opt-125m
21 | request_timeout: 600.0
--------------------------------------------------------------------------------
/tests/settings/test_settings.py:
--------------------------------------------------------------------------------
1 | from private_gpt.settings.settings import Settings, settings
2 | from tests.fixtures.mock_injector import MockInjector
3 |
4 |
5 | def test_settings_are_loaded_and_merged() -> None:
6 | assert settings().server.env_name == "test"
7 |
8 |
9 | def test_settings_can_be_overriden(injector: MockInjector) -> None:
10 | injector.bind_settings({"server": {"env_name": "overriden"}})
11 | mocked_settings = injector.get(Settings)
12 | assert mocked_settings.server.env_name == "overriden"
13 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | # This CITATION.cff file was generated with cffinit.
2 | # Visit https://bit.ly/cffinit to generate yours today!
3 |
4 | cff-version: 1.2.0
5 | title: PrivateGPT
6 | message: >-
7 | If you use this software, please cite it using the
8 | metadata from this file.
9 | type: software
10 | authors:
11 | - name: Zylon by PrivateGPT
12 | address: hello@zylon.ai
13 | website: 'https://www.zylon.ai/'
14 | repository-code: 'https://github.com/zylon-ai/private-gpt'
15 | license: Apache-2.0
16 | date-released: '2023-05-02'
17 |
--------------------------------------------------------------------------------
/private_gpt/server/health/health_router.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from fastapi import APIRouter
4 | from pydantic import BaseModel, Field
5 |
6 | # Not authentication or authorization required to get the health status.
7 | health_router = APIRouter()
8 |
9 |
10 | class HealthResponse(BaseModel):
11 | status: Literal["ok"] = Field(default="ok")
12 |
13 |
14 | @health_router.get("/health", tags=["Health"])
15 | def health() -> HealthResponse:
16 | """Return ok if the system is up."""
17 | return HealthResponse(status="ok")
18 |
--------------------------------------------------------------------------------
/tests/fixtures/fast_api_test_client.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from fastapi.testclient import TestClient
3 |
4 | from private_gpt.launcher import create_app
5 | from tests.fixtures.mock_injector import MockInjector
6 |
7 |
8 | @pytest.fixture()
9 | def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient:
10 | if request is not None and hasattr(request, "param"):
11 | injector.bind_settings(request.param or {})
12 |
13 | app_under_test = create_app(injector.test_injector)
14 | return TestClient(app_under_test)
15 |
--------------------------------------------------------------------------------
/private_gpt/paths.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from private_gpt.constants import PROJECT_ROOT_PATH
4 | from private_gpt.settings.settings import settings
5 |
6 |
7 | def _absolute_or_from_project_root(path: str) -> Path:
8 | if path.startswith("/"):
9 | return Path(path)
10 | return PROJECT_ROOT_PATH / path
11 |
12 |
13 | models_path: Path = PROJECT_ROOT_PATH / "models"
14 | models_cache_path: Path = models_path / "cache"
15 | docs_path: Path = PROJECT_ROOT_PATH / "docs"
16 | local_data_path: Path = _absolute_or_from_project_root(
17 | settings().data.local_data_folder
18 | )
19 |
--------------------------------------------------------------------------------
/private_gpt/di.py:
--------------------------------------------------------------------------------
1 | from injector import Injector
2 |
3 | from private_gpt.settings.settings import Settings, unsafe_typed_settings
4 |
5 |
6 | def create_application_injector() -> Injector:
7 | _injector = Injector(auto_bind=True)
8 | _injector.binder.bind(Settings, to=unsafe_typed_settings)
9 | return _injector
10 |
11 |
12 | """
13 | Global injector for the application.
14 |
15 | Avoid using this reference, it will make your code harder to test.
16 |
17 | Instead, use the `request.state.injector` reference, which is bound to every request
18 | """
19 | global_injector: Injector = create_application_injector()
20 |
--------------------------------------------------------------------------------
/.github/workflows/publish-docs.yml:
--------------------------------------------------------------------------------
1 | name: publish docs
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - "fern/**"
9 |
10 | jobs:
11 | publish-docs:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - name: Checkout repo
15 | uses: actions/checkout@v4
16 |
17 | - name: Setup node
18 | uses: actions/setup-node@v3
19 |
20 | - name: Download Fern
21 | run: npm install -g fern-api
22 |
23 | - name: Generate and Publish Docs
24 | env:
25 | FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
26 | run: fern generate --docs --log-level debug
27 |
--------------------------------------------------------------------------------
/tests/fixtures/auto_close_qdrant.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from private_gpt.components.vector_store.vector_store_component import (
4 | VectorStoreComponent,
5 | )
6 | from tests.fixtures.mock_injector import MockInjector
7 |
8 |
9 | @pytest.fixture(autouse=True)
10 | def _auto_close_vector_store_client(injector: MockInjector) -> None:
11 | """Auto close VectorStore client after each test.
12 |
13 | VectorStore client (qdrant/chromadb) opens a connection the
14 | Database that causes issues when running tests too fast,
15 | so close explicitly after each test.
16 | """
17 | yield
18 | injector.get(VectorStoreComponent).close()
19 |
--------------------------------------------------------------------------------
/tests/server/embeddings/test_embedding_routes.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 | from private_gpt.server.embeddings.embeddings_router import (
4 | EmbeddingsBody,
5 | EmbeddingsResponse,
6 | )
7 |
8 |
9 | def test_embeddings_generation(test_client: TestClient) -> None:
10 | body = EmbeddingsBody(input="Embed me")
11 | response = test_client.post("/v1/embeddings", json=body.model_dump())
12 |
13 | assert response.status_code == 200
14 | embedding_response = EmbeddingsResponse.model_validate(response.json())
15 | assert len(embedding_response.data) > 0
16 | assert len(embedding_response.data[0].embedding) > 0
17 |
--------------------------------------------------------------------------------
/settings-local.yaml:
--------------------------------------------------------------------------------
1 | # poetry install --extras "ui llms-llama-cpp vector-stores-qdrant embeddings-huggingface"
2 | server:
3 | env_name: ${APP_ENV:local}
4 |
5 | llm:
6 | mode: llamacpp
7 | # Should be matching the selected model
8 | max_new_tokens: 512
9 | context_window: 3900
10 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2
11 | prompt_style: "mistral"
12 |
13 | llamacpp:
14 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
15 | llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
16 |
17 | embedding:
18 | mode: huggingface
19 |
20 | huggingface:
21 | embedding_hf_model_name: BAAI/bge-small-en-v1.5
22 |
23 | vectorstore:
24 | database: qdrant
25 |
26 | qdrant:
27 | path: local_data/private_gpt/qdrant
28 |
--------------------------------------------------------------------------------
/fern/docs/pages/api-reference/api-reference.mdx:
--------------------------------------------------------------------------------
1 | # API Reference
2 |
3 | The API is divided in two logical blocks:
4 |
5 | 1. High-level API, abstracting all the complexity of a RAG (Retrieval Augmented Generation) pipeline implementation:
6 | - Ingestion of documents: internally managing document parsing, splitting, metadata extraction,
7 | embedding generation and storage.
8 | - Chat & Completions using context from ingested documents: abstracting the retrieval of context, the prompt
9 | engineering and the response generation.
10 |
11 | 2. Low-level API, allowing advanced users to implement their own complex pipelines:
12 | - Embeddings generation: based on a piece of text.
13 | - Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested
14 | documents.
--------------------------------------------------------------------------------
/tests/fixtures/ingest_helper.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 | from fastapi.testclient import TestClient
5 |
6 | from private_gpt.server.ingest.ingest_router import IngestResponse
7 |
8 |
9 | class IngestHelper:
10 | def __init__(self, test_client: TestClient):
11 | self.test_client = test_client
12 |
13 | def ingest_file(self, path: Path) -> IngestResponse:
14 | files = {"file": (path.name, path.open("rb"))}
15 |
16 | response = self.test_client.post("/v1/ingest/file", files=files)
17 | assert response.status_code == 200
18 | ingest_result = IngestResponse.model_validate(response.json())
19 | return ingest_result
20 |
21 |
22 | @pytest.fixture()
23 | def ingest_helper(test_client: TestClient) -> IngestHelper:
24 | return IngestHelper(test_client)
25 |
--------------------------------------------------------------------------------
/tests/server/chunks/test_chunk_routes.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from fastapi.testclient import TestClient
4 |
5 | from private_gpt.server.chunks.chunks_router import ChunksBody, ChunksResponse
6 | from tests.fixtures.ingest_helper import IngestHelper
7 |
8 |
9 | def test_chunks_retrieval(test_client: TestClient, ingest_helper: IngestHelper) -> None:
10 | # Make sure there is at least some chunk to query in the database
11 | path = Path(__file__).parents[0] / "chunk_test.txt"
12 | ingest_helper.ingest_file(path)
13 |
14 | body = ChunksBody(text="b483dd15-78c4-4d67-b546-21a0d690bf43")
15 | response = test_client.post("/v1/chunks", json=body.model_dump())
16 | assert response.status_code == 200
17 | chunk_response = ChunksResponse.model_validate(response.json())
18 | assert len(chunk_response.data) > 0
19 |
--------------------------------------------------------------------------------
/settings-ollama-pg.yaml:
--------------------------------------------------------------------------------
1 | # Using ollama and postgres for the vector, doc and index store. Ollama is also used for embeddings.
2 | # To use install these extras:
3 | # poetry install --extras "llms-ollama ui vector-stores-postgres embeddings-ollama storage-nodestore-postgres"
4 | server:
5 | env_name: ${APP_ENV:ollama}
6 |
7 | llm:
8 | mode: ollama
9 | max_new_tokens: 512
10 | context_window: 3900
11 |
12 | embedding:
13 | mode: ollama
14 | embed_dim: 768
15 |
16 | ollama:
17 | llm_model: mistral
18 | embedding_model: nomic-embed-text
19 | api_base: http://localhost:11434
20 |
21 | nodestore:
22 | database: postgres
23 |
24 | vectorstore:
25 | database: postgres
26 |
27 | postgres:
28 | host: localhost
29 | port: 5432
30 | database: postgres
31 | user: postgres
32 | password: admin
33 | schema_name: private_gpt
34 |
35 |
--------------------------------------------------------------------------------
/.github/workflows/actions/install_dependencies/action.yml:
--------------------------------------------------------------------------------
1 | name: "Install Dependencies"
2 | description: "Action to build the project dependencies from the main versions"
3 | inputs:
4 | python_version:
5 | required: true
6 | type: string
7 | default: "3.11.4"
8 | poetry_version:
9 | required: true
10 | type: string
11 | default: "1.5.1"
12 |
13 | runs:
14 | using: composite
15 | steps:
16 | - name: Install Poetry
17 | uses: snok/install-poetry@v1
18 | with:
19 | version: ${{ inputs.poetry_version }}
20 | virtualenvs-create: true
21 | virtualenvs-in-project: false
22 | installer-parallel: true
23 | - uses: actions/setup-python@v4
24 | with:
25 | python-version: ${{ inputs.python_version }}
26 | cache: "poetry"
27 | - name: Install Dependencies
28 | run: poetry install --extras "ui vector-stores-qdrant" --no-root
29 | shell: bash
30 |
31 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
2 | #
3 | # You can adjust the behavior by modifying this file.
4 | # For more information, see:
5 | # https://github.com/actions/stale
6 | name: Mark stale issues and pull requests
7 |
8 | on:
9 | schedule:
10 | - cron: '42 5 * * *'
11 |
12 | jobs:
13 | stale:
14 |
15 | runs-on: ubuntu-latest
16 | permissions:
17 | issues: write
18 | pull-requests: write
19 |
20 | steps:
21 | - uses: actions/stale@v8
22 | with:
23 | repo-token: ${{ secrets.GITHUB_TOKEN }}
24 | days-before-stale: 15
25 | stale-issue-message: 'Stale issue'
26 | stale-pr-message: 'Stale pull request'
27 | stale-issue-label: 'stale'
28 | stale-pr-label: 'stale'
29 | exempt-issue-labels: 'autorelease: pending'
30 | exempt-pr-labels: 'autorelease: pending'
31 |
--------------------------------------------------------------------------------
/fern/docs/pages/api-reference/sdks.mdx:
--------------------------------------------------------------------------------
1 | We use [Fern](www.buildwithfern.com) to offer API clients for Node.js, Python, Go, and Java.
2 | We recommend using these clients to interact with our endpoints.
3 | The clients are kept up to date automatically, so we encourage you to use the latest version.
4 |
5 | ## SDKs
6 |
7 | *Coming soon!*
8 |
9 |
10 |
15 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
31 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/private_gpt/server/embeddings/embeddings_service.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from injector import inject, singleton
4 | from pydantic import BaseModel, Field
5 |
6 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent
7 |
8 |
9 | class Embedding(BaseModel):
10 | index: int
11 | object: Literal["embedding"]
12 | embedding: list[float] = Field(examples=[[0.0023064255, -0.009327292]])
13 |
14 |
15 | @singleton
16 | class EmbeddingsService:
17 | @inject
18 | def __init__(self, embedding_component: EmbeddingComponent) -> None:
19 | self.embedding_model = embedding_component.embedding_model
20 |
21 | def texts_embeddings(self, texts: list[str]) -> list[Embedding]:
22 | texts_embeddings = self.embedding_model.get_text_embedding_batch(texts)
23 | return [
24 | Embedding(
25 | index=texts_embeddings.index(embedding),
26 | object="embedding",
27 | embedding=embedding,
28 | )
29 | for embedding in texts_embeddings
30 | ]
31 |
--------------------------------------------------------------------------------
/private_gpt/__init__.py:
--------------------------------------------------------------------------------
1 | """private-gpt."""
2 |
3 | import logging
4 | import os
5 |
6 | # Set to 'DEBUG' to have extensive logging turned on, even for libraries
7 | ROOT_LOG_LEVEL = "INFO"
8 |
9 | PRETTY_LOG_FORMAT = (
10 | "%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s"
11 | )
12 | logging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%H:%M:%S")
13 | logging.captureWarnings(True)
14 |
15 | # Disable gradio analytics
16 | # This is done this way because gradio does not solely rely on what values are
17 | # passed to gr.Blocks(enable_analytics=...) but also on the environment
18 | # variable GRADIO_ANALYTICS_ENABLED. `gradio.strings` actually reads this env
19 | # directly, so to fully disable gradio analytics we need to set this env var.
20 | os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
21 |
22 | # Disable chromaDB telemetry
23 | # It is already disabled, see PR#1144
24 | # os.environ["ANONYMIZED_TELEMETRY"] = "False"
25 |
26 | # adding tiktoken cache path within repo to be able to run in offline environment.
27 | os.environ["TIKTOKEN_CACHE_DIR"] = "tiktoken_cache"
28 |
--------------------------------------------------------------------------------
/private_gpt/server/ingest/model.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Literal
2 |
3 | from llama_index.core.schema import Document
4 | from pydantic import BaseModel, Field
5 |
6 |
7 | class IngestedDoc(BaseModel):
8 | object: Literal["ingest.document"]
9 | doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"])
10 | doc_metadata: dict[str, Any] | None = Field(
11 | examples=[
12 | {
13 | "page_label": "2",
14 | "file_name": "Sales Report Q3 2023.pdf",
15 | }
16 | ]
17 | )
18 |
19 | @staticmethod
20 | def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
21 | """Remove unwanted metadata keys."""
22 | for key in ["doc_id", "window", "original_text"]:
23 | metadata.pop(key, None)
24 | return metadata
25 |
26 | @staticmethod
27 | def from_document(document: Document) -> "IngestedDoc":
28 | return IngestedDoc(
29 | object="ingest.document",
30 | doc_id=document.doc_id,
31 | doc_metadata=IngestedDoc.curate_metadata(document.metadata),
32 | )
33 |
--------------------------------------------------------------------------------
/Dockerfile.external:
--------------------------------------------------------------------------------
1 | FROM python:3.11.6-slim-bookworm as base
2 |
3 | # Install poetry
4 | RUN pip install pipx
5 | RUN python3 -m pipx ensurepath
6 | RUN pipx install poetry
7 | ENV PATH="/root/.local/bin:$PATH"
8 | ENV PATH=".venv/bin/:$PATH"
9 |
10 | # https://python-poetry.org/docs/configuration/#virtualenvsin-project
11 | ENV POETRY_VIRTUALENVS_IN_PROJECT=true
12 |
13 | FROM base as dependencies
14 | WORKDIR /home/worker/app
15 | COPY pyproject.toml poetry.lock ./
16 |
17 | RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama"
18 |
19 | FROM base as app
20 |
21 | ENV PYTHONUNBUFFERED=1
22 | ENV PORT=8080
23 | EXPOSE 8080
24 |
25 | # Prepare a non-root user
26 | RUN adduser --system worker
27 | WORKDIR /home/worker/app
28 |
29 | RUN mkdir local_data; chown worker local_data
30 | RUN mkdir models; chown worker models
31 | COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
32 | COPY --chown=worker private_gpt/ private_gpt
33 | COPY --chown=worker fern/ fern
34 | COPY --chown=worker *.yaml *.md ./
35 | COPY --chown=worker scripts/ scripts
36 |
37 | ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
38 |
39 | USER worker
40 | ENTRYPOINT python -m private_gpt
--------------------------------------------------------------------------------
/scripts/extract_openapi.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | import sys
4 |
5 | import yaml
6 | from uvicorn.importer import import_from_string
7 |
8 | parser = argparse.ArgumentParser(prog="extract_openapi.py")
9 | parser.add_argument("app", help='App import string. Eg. "main:app"', default="main:app")
10 | parser.add_argument("--app-dir", help="Directory containing the app", default=None)
11 | parser.add_argument(
12 | "--out", help="Output file ending in .json or .yaml", default="openapi.yaml"
13 | )
14 |
15 | if __name__ == "__main__":
16 | args = parser.parse_args()
17 |
18 | if args.app_dir is not None:
19 | print(f"adding {args.app_dir} to sys.path")
20 | sys.path.insert(0, args.app_dir)
21 |
22 | print(f"importing app from {args.app}")
23 | app = import_from_string(args.app)
24 | openapi = app.openapi()
25 | version = openapi.get("openapi", "unknown version")
26 |
27 | print(f"writing openapi spec v{version}")
28 | with open(args.out, "w") as f:
29 | if args.out.endswith(".json"):
30 | json.dump(openapi, f, indent=2)
31 | else:
32 | yaml.dump(openapi, f, sort_keys=False)
33 |
34 | print(f"spec written to {args.out}")
35 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_install_hook_types:
2 | # Mandatory to install both pre-commit and pre-push hooks (see https://pre-commit.com/#top_level-default_install_hook_types)
3 | # Add new hook types here to ensure automatic installation when running `pre-commit install`
4 | - pre-commit
5 | - pre-push
6 | repos:
7 | - repo: https://github.com/pre-commit/pre-commit-hooks
8 | rev: v4.3.0
9 | hooks:
10 | - id: trailing-whitespace
11 | - id: end-of-file-fixer
12 | - id: check-yaml
13 | - id: check-json
14 | - id: check-added-large-files
15 |
16 | - repo: local
17 | hooks:
18 | - id: black
19 | name: Formatting (black)
20 | entry: black
21 | language: system
22 | types: [python]
23 | stages: [commit]
24 | - id: ruff
25 | name: Linter (ruff)
26 | entry: ruff
27 | language: system
28 | types: [python]
29 | stages: [commit]
30 | - id: mypy
31 | name: Type checking (mypy)
32 | entry: make mypy
33 | pass_filenames: false
34 | language: system
35 | types: [python]
36 | stages: [commit]
37 | - id: test
38 | name: Unit tests (pytest)
39 | entry: make test
40 | pass_filenames: false
41 | language: system
42 | types: [python]
43 | stages: [push]
--------------------------------------------------------------------------------
/settings-docker.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:prod}
3 | port: ${PORT:8080}
4 |
5 | llm:
6 | mode: ${PGPT_MODE:mock}
7 |
8 | embedding:
9 | mode: ${PGPT_MODE:sagemaker}
10 |
11 | llamacpp:
12 | llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF}
13 | llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf}
14 |
15 | huggingface:
16 | embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
17 |
18 | sagemaker:
19 | llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:}
20 | embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:}
21 |
22 | ollama:
23 | llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral}
24 | embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text}
25 | api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434}
26 | embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434}
27 | tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0}
28 | top_k: ${PGPT_OLLAMA_TOP_K:40}
29 | top_p: ${PGPT_OLLAMA_TOP_P:0.9}
30 | repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64}
31 | repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2}
32 | request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0}
33 |
34 | ui:
35 | enabled: true
36 | path: /
37 |
--------------------------------------------------------------------------------
/private_gpt/server/embeddings/embeddings_router.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from fastapi import APIRouter, Depends, Request
4 | from pydantic import BaseModel
5 |
6 | from private_gpt.server.embeddings.embeddings_service import (
7 | Embedding,
8 | EmbeddingsService,
9 | )
10 | from private_gpt.server.utils.auth import authenticated
11 |
12 | embeddings_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
13 |
14 |
15 | class EmbeddingsBody(BaseModel):
16 | input: str | list[str]
17 |
18 |
19 | class EmbeddingsResponse(BaseModel):
20 | object: Literal["list"]
21 | model: Literal["private-gpt"]
22 | data: list[Embedding]
23 |
24 |
25 | @embeddings_router.post("/embeddings", tags=["Embeddings"])
26 | def embeddings_generation(request: Request, body: EmbeddingsBody) -> EmbeddingsResponse:
27 | """Get a vector representation of a given input.
28 |
29 | That vector representation can be easily consumed
30 | by machine learning models and algorithms.
31 | """
32 | service = request.state.injector.get(EmbeddingsService)
33 | input_texts = body.input if isinstance(body.input, list) else [body.input]
34 | embeddings = service.texts_embeddings(input_texts)
35 | return EmbeddingsResponse(object="list", model="private-gpt", data=embeddings)
36 |
--------------------------------------------------------------------------------
/tests/server/chat/test_chat_routes.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 | from private_gpt.open_ai.openai_models import OpenAICompletion, OpenAIMessage
4 | from private_gpt.server.chat.chat_router import ChatBody
5 |
6 |
7 | def test_chat_route_produces_a_stream(test_client: TestClient) -> None:
8 | body = ChatBody(
9 | messages=[OpenAIMessage(content="test", role="user")],
10 | use_context=False,
11 | stream=True,
12 | )
13 | response = test_client.post("/v1/chat/completions", json=body.model_dump())
14 |
15 | raw_events = response.text.split("\n\n")
16 | events = [
17 | item.removeprefix("data: ") for item in raw_events if item.startswith("data: ")
18 | ]
19 | assert response.status_code == 200
20 | assert "text/event-stream" in response.headers["content-type"]
21 | assert len(events) > 0
22 | assert events[-1] == "[DONE]"
23 |
24 |
25 | def test_chat_route_produces_a_single_value(test_client: TestClient) -> None:
26 | body = ChatBody(
27 | messages=[OpenAIMessage(content="test", role="user")],
28 | use_context=False,
29 | stream=False,
30 | )
31 | response = test_client.post("/v1/chat/completions", json=body.model_dump())
32 |
33 | # No asserts, if it validates it's good
34 | OpenAICompletion.model_validate(response.json())
35 | assert response.status_code == 200
36 |
--------------------------------------------------------------------------------
/tests/settings/test_settings_loader.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 |
4 | import pytest
5 |
6 | from private_gpt.settings.yaml import load_yaml_with_envvars
7 |
8 |
9 | def test_environment_variables_are_loaded() -> None:
10 | sample_yaml = """
11 | replaced: ${TEST_REPLACE_ME}
12 | """
13 | env = {"TEST_REPLACE_ME": "replaced"}
14 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), env)
15 | os.environ.copy()
16 | assert loaded["replaced"] == "replaced"
17 |
18 |
19 | def test_environment_defaults_variables_are_loaded() -> None:
20 | sample_yaml = """
21 | replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5}
22 | """
23 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {})
24 | assert loaded["replaced"] == "BAAI/bge-small-en-v1.5"
25 |
26 |
27 | def test_environment_defaults_variables_are_loaded_with_duplicated_delimiters() -> None:
28 | sample_yaml = """
29 | replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME::duped::}
30 | """
31 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {})
32 | assert loaded["replaced"] == ":duped::"
33 |
34 |
35 | def test_environment_without_defaults_fails() -> None:
36 | sample_yaml = """
37 | replaced: ${TEST_REPLACE_ME}
38 | """
39 | with pytest.raises(ValueError) as error:
40 | load_yaml_with_envvars(io.StringIO(sample_yaml), {})
41 | assert error is not None
42 |
--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
1 | name: docker
2 |
3 | on:
4 | release:
5 | types: [ published ]
6 | workflow_dispatch:
7 |
8 | env:
9 | REGISTRY: ghcr.io
10 | IMAGE_NAME: ${{ github.repository }}
11 |
12 | jobs:
13 | build-and-push-image:
14 | runs-on: ubuntu-latest
15 | permissions:
16 | contents: read
17 | packages: write
18 | steps:
19 | - name: Checkout repository
20 | uses: actions/checkout@v4
21 | - name: Log in to the Container registry
22 | uses: docker/login-action@v3
23 | with:
24 | registry: ${{ env.REGISTRY }}
25 | username: ${{ github.actor }}
26 | password: ${{ secrets.GITHUB_TOKEN }}
27 | - name: Extract metadata (tags, labels) for Docker
28 | id: meta
29 | uses: docker/metadata-action@v5
30 | with:
31 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
32 | tags: |
33 | type=ref,event=branch
34 | type=ref,event=pr
35 | type=semver,pattern={{version}}
36 | type=semver,pattern={{major}}.{{minor}}
37 | type=sha
38 | - name: Build and push Docker image
39 | uses: docker/build-push-action@v5
40 | with:
41 | context: .
42 | file: Dockerfile.external
43 | push: true
44 | tags: ${{ steps.meta.outputs.tags }}
45 | labels: ${{ steps.meta.outputs.labels }}
46 |
--------------------------------------------------------------------------------
/fern/docs/pages/overview/welcome.mdx:
--------------------------------------------------------------------------------
1 | PrivateGPT provides an **API** containing all the building blocks required to
2 | build **private, context-aware AI applications**.
3 | The API follows and extends OpenAI API standard, and supports both normal and streaming responses.
4 | That means that, if you can use OpenAI API in one of your tools, you can use your own PrivateGPT API instead,
5 | with no code changes, **and for free** if you are running privateGPT in a `local` setup.
6 |
7 | Get started by understanding the [Main Concepts and Installation](/installation) and then dive into the [API Reference](/api-reference).
8 |
9 | ## Frequently Visited Resources
10 |
11 |
12 |
17 |
22 |
27 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk
39 | model download script, ingestion script, documents folder watch, etc.
40 |
--------------------------------------------------------------------------------
/tests/fixtures/mock_injector.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Callable
2 | from typing import Any
3 | from unittest.mock import MagicMock
4 |
5 | import pytest
6 | from injector import Provider, ScopeDecorator, singleton
7 |
8 | from private_gpt.di import create_application_injector
9 | from private_gpt.settings.settings import Settings, unsafe_settings
10 | from private_gpt.settings.settings_loader import merge_settings
11 | from private_gpt.utils.typing import T
12 |
13 |
14 | class MockInjector:
15 | def __init__(self) -> None:
16 | self.test_injector = create_application_injector()
17 |
18 | def bind_mock(
19 | self,
20 | interface: type[T],
21 | mock: (T | (Callable[..., T] | Provider[T])) | None = None,
22 | *,
23 | scope: ScopeDecorator = singleton,
24 | ) -> T:
25 | if mock is None:
26 | mock = MagicMock()
27 | self.test_injector.binder.bind(interface, to=mock, scope=scope)
28 | return mock # type: ignore
29 |
30 | def bind_settings(self, settings: dict[str, Any]) -> Settings:
31 | merged = merge_settings([unsafe_settings, settings])
32 | new_settings = Settings(**merged)
33 | self.test_injector.binder.bind(Settings, new_settings)
34 | return new_settings
35 |
36 | def get(self, interface: type[T]) -> T:
37 | return self.test_injector.get(interface)
38 |
39 |
40 | @pytest.fixture()
41 | def injector() -> MockInjector:
42 | return MockInjector()
43 |
--------------------------------------------------------------------------------
/private_gpt/settings/yaml.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import typing
4 | from typing import Any, TextIO
5 |
6 | from yaml import SafeLoader
7 |
8 | _env_replace_matcher = re.compile(r"\$\{(\w|_)+:?.*}")
9 |
10 |
11 | @typing.no_type_check # pyaml does not have good hints, everything is Any
12 | def load_yaml_with_envvars(
13 | stream: TextIO, environ: dict[str, Any] = os.environ
14 | ) -> dict[str, Any]:
15 | """Load yaml file with environment variable expansion.
16 |
17 | The pattern ${VAR} or ${VAR:default} will be replaced with
18 | the value of the environment variable.
19 | """
20 | loader = SafeLoader(stream)
21 |
22 | def load_env_var(_, node) -> str:
23 | """Extract the matched value, expand env variable, and replace the match."""
24 | value = str(node.value).removeprefix("${").removesuffix("}")
25 | split = value.split(":", 1)
26 | env_var = split[0]
27 | value = environ.get(env_var)
28 | default = None if len(split) == 1 else split[1]
29 | if value is None and default is None:
30 | raise ValueError(
31 | f"Environment variable {env_var} is not set and not default was provided"
32 | )
33 | return value or default
34 |
35 | loader.add_implicit_resolver("env_var_replacer", _env_replace_matcher, None)
36 | loader.add_constructor("env_var_replacer", load_env_var)
37 |
38 | try:
39 | return loader.get_single_data()
40 | finally:
41 | loader.dispose()
42 |
--------------------------------------------------------------------------------
/Dockerfile.local:
--------------------------------------------------------------------------------
1 | ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER
2 | ### You will run into a segfault in mac
3 | FROM python:3.11.6-slim-bookworm as base
4 |
5 | # Install poetry
6 | RUN pip install pipx
7 | RUN python3 -m pipx ensurepath
8 | RUN pipx install poetry
9 | ENV PATH="/root/.local/bin:$PATH"
10 | ENV PATH=".venv/bin/:$PATH"
11 |
12 | # Dependencies to build llama-cpp
13 | RUN apt update && apt install -y \
14 | libopenblas-dev\
15 | ninja-build\
16 | build-essential\
17 | pkg-config\
18 | wget
19 |
20 | # https://python-poetry.org/docs/configuration/#virtualenvsin-project
21 | ENV POETRY_VIRTUALENVS_IN_PROJECT=true
22 |
23 | FROM base as dependencies
24 | WORKDIR /home/worker/app
25 | COPY pyproject.toml poetry.lock ./
26 |
27 | RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant"
28 |
29 | FROM base as app
30 |
31 | ENV PYTHONUNBUFFERED=1
32 | ENV PORT=8080
33 | EXPOSE 8080
34 |
35 | # Prepare a non-root user
36 | RUN adduser --group worker
37 | RUN adduser --system --ingroup worker worker
38 | WORKDIR /home/worker/app
39 |
40 | RUN mkdir local_data; chown worker local_data
41 | RUN mkdir models; chown worker models
42 | COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv
43 | COPY --chown=worker private_gpt/ private_gpt
44 | COPY --chown=worker fern/ fern
45 | COPY --chown=worker *.yaml *.md ./
46 | COPY --chown=worker scripts/ scripts
47 |
48 | ENV PYTHONPATH="$PYTHONPATH:/private_gpt/"
49 |
50 | USER worker
51 | ENTRYPOINT python -m private_gpt
--------------------------------------------------------------------------------
/fern/README.md:
--------------------------------------------------------------------------------
1 | # Documentation of privateGPT
2 |
3 | The documentation of this project is being rendered thanks to [fern](https://github.com/fern-api/fern).
4 |
5 | Fern is basically transforming your `.md` and `.mdx` files into a static website: your documentation.
6 |
7 | The configuration of your documentation is done in the `./docs.yml` file.
8 | There, you can configure the navbar, tabs, sections and pages being rendered.
9 |
10 | The documentation of fern (and the syntax of its configuration `docs.yml`) is
11 | available there [docs.buildwithfern.com](https://docs.buildwithfern.com/).
12 |
13 | ## How to run fern
14 |
15 | **You cannot render your documentation locally without fern credentials.**
16 |
17 | To see how your documentation looks like, you **have to** use the CICD of this
18 | repository (by opening a PR, CICD job will be executed, and a preview of
19 | your PR's documentation will be deployed in vercel automatically, through fern).
20 |
21 | The only thing you can do locally, is to run `fern check`, which check the syntax of
22 | your `docs.yml` file.
23 |
24 | ## How to add a new page
25 | Add in the `docs.yml` a new `page`, with the following syntax:
26 |
27 | ```yml
28 | navigation:
29 | # ...
30 | - tab: my-existing-tab
31 | layout:
32 | # ...
33 | - section: My Existing Section
34 | contents:
35 | # ...
36 | - page: My new page display name
37 | # The path of the page, relative to `fern/`
38 | path: ./docs/pages/my-existing-tab/new-page-content.mdx
39 | ```
--------------------------------------------------------------------------------
/private_gpt/server/ingest/ingest_watcher.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Callable
2 | from pathlib import Path
3 | from typing import Any
4 |
5 | from watchdog.events import (
6 | FileCreatedEvent,
7 | FileModifiedEvent,
8 | FileSystemEvent,
9 | FileSystemEventHandler,
10 | )
11 | from watchdog.observers import Observer
12 |
13 |
14 | class IngestWatcher:
15 | def __init__(
16 | self, watch_path: Path, on_file_changed: Callable[[Path], None]
17 | ) -> None:
18 | self.watch_path = watch_path
19 | self.on_file_changed = on_file_changed
20 |
21 | class Handler(FileSystemEventHandler):
22 | def on_modified(self, event: FileSystemEvent) -> None:
23 | if isinstance(event, FileModifiedEvent):
24 | on_file_changed(Path(event.src_path))
25 |
26 | def on_created(self, event: FileSystemEvent) -> None:
27 | if isinstance(event, FileCreatedEvent):
28 | on_file_changed(Path(event.src_path))
29 |
30 | event_handler = Handler()
31 | observer: Any = Observer()
32 | self._observer = observer
33 | self._observer.schedule(event_handler, str(watch_path), recursive=True)
34 |
35 | def start(self) -> None:
36 | self._observer.start()
37 | while self._observer.is_alive():
38 | try:
39 | self._observer.join(1)
40 | except KeyboardInterrupt:
41 | break
42 |
43 | def stop(self) -> None:
44 | self._observer.stop()
45 | self._observer.join()
46 |
--------------------------------------------------------------------------------
/.github/workflows/preview-docs.yml:
--------------------------------------------------------------------------------
1 | name: deploy preview docs
2 |
3 | on:
4 | pull_request_target:
5 | branches:
6 | - main
7 | paths:
8 | - "fern/**"
9 |
10 | jobs:
11 | preview-docs:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout repository
16 | uses: actions/checkout@v4
17 | with:
18 | ref: refs/pull/${{ github.event.pull_request.number }}/merge
19 |
20 | - name: Setup Node.js
21 | uses: actions/setup-node@v4
22 | with:
23 | node-version: "18"
24 |
25 | - name: Install Fern
26 | run: npm install -g fern-api
27 |
28 | - name: Generate Documentation Preview with Fern
29 | id: generate_docs
30 | env:
31 | FERN_TOKEN: ${{ secrets.FERN_TOKEN }}
32 | run: |
33 | output=$(fern generate --docs --preview --log-level debug)
34 | echo "$output"
35 | # Extract the URL
36 | preview_url=$(echo "$output" | grep -oP '(?<=Published docs to )https://[^\s]*')
37 | # Set the output for the step
38 | echo "::set-output name=preview_url::$preview_url"
39 | - name: Comment PR with URL using github-actions bot
40 | uses: actions/github-script@v4
41 | if: ${{ steps.generate_docs.outputs.preview_url }}
42 | with:
43 | script: |
44 | const preview_url = '${{ steps.generate_docs.outputs.preview_url }}';
45 | const issue_number = context.issue.number;
46 | github.issues.createComment({
47 | ...context.repo,
48 | issue_number: issue_number,
49 | body: `Published docs preview URL: ${preview_url}`
50 | })
51 |
--------------------------------------------------------------------------------
/settings-ollama.yaml:
--------------------------------------------------------------------------------
1 | server:
2 | env_name: ${APP_ENV:ollama}
3 |
4 | llm:
5 | mode: ollama
6 | max_new_tokens: 512
7 | context_window: 3900
8 | temperature: 0.1 #The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
9 |
10 | embedding:
11 | mode: ollama
12 |
13 | ollama:
14 | llm_model: mistral
15 | embedding_model: nomic-embed-text
16 | api_base: http://localhost:11434
17 | embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
18 | keep_alive: 5m
19 | tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting.
20 | top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
21 | top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
22 | repeat_last_n: 64 # Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)
23 | repeat_penalty: 1.2 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
24 | request_timeout: 120.0 # Time elapsed until ollama times out the request. Default is 120s. Format is float.
25 |
26 | vectorstore:
27 | database: qdrant
28 |
29 | qdrant:
30 | path: local_data/private_gpt/qdrant
31 |
--------------------------------------------------------------------------------
/scripts/setup:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os
3 | import argparse
4 |
5 | from huggingface_hub import hf_hub_download, snapshot_download
6 | from transformers import AutoTokenizer
7 |
8 | from private_gpt.paths import models_path, models_cache_path
9 | from private_gpt.settings.settings import settings
10 |
11 | resume_download = True
12 | if __name__ == '__main__':
13 | parser = argparse.ArgumentParser(prog='Setup: Download models from Hugging Face')
14 | parser.add_argument('--resume', default=True, action=argparse.BooleanOptionalAction, help='Enable/Disable resume_download options to restart the download progress interrupted')
15 | args = parser.parse_args()
16 | resume_download = args.resume
17 |
18 | os.makedirs(models_path, exist_ok=True)
19 |
20 | # Download Embedding model
21 | embedding_path = models_path / "embedding"
22 | print(f"Downloading embedding {settings().huggingface.embedding_hf_model_name}")
23 | snapshot_download(
24 | repo_id=settings().huggingface.embedding_hf_model_name,
25 | cache_dir=models_cache_path,
26 | local_dir=embedding_path,
27 | )
28 | print("Embedding model downloaded!")
29 |
30 | # Download LLM and create a symlink to the model file
31 | print(f"Downloading LLM {settings().llamacpp.llm_hf_model_file}")
32 | hf_hub_download(
33 | repo_id=settings().llamacpp.llm_hf_repo_id,
34 | filename=settings().llamacpp.llm_hf_model_file,
35 | cache_dir=models_cache_path,
36 | local_dir=models_path,
37 | resume_download=resume_download,
38 | )
39 | print("LLM model downloaded!")
40 |
41 | # Download Tokenizer
42 | print(f"Downloading tokenizer {settings().llm.tokenizer}")
43 | AutoTokenizer.from_pretrained(
44 | pretrained_model_name_or_path=settings().llm.tokenizer,
45 | cache_dir=models_cache_path,
46 | )
47 | print("Tokenizer downloaded!")
48 |
49 | print("Setup done")
50 |
--------------------------------------------------------------------------------
/fern/docs/pages/manual/reranker.mdx:
--------------------------------------------------------------------------------
1 | ## Enhancing Response Quality with Reranking
2 |
3 | PrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM.
4 |
5 | ### Enabling Reranking
6 |
7 | Document reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case.
8 |
9 | #### Additional Requirements
10 |
11 | Before enabling reranking, you must install additional dependencies:
12 |
13 | ```bash
14 | poetry install --extras rerank-sentence-transformers
15 | ```
16 |
17 | This command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking.
18 |
19 | #### Configuration
20 |
21 | To enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider:
22 |
23 | - `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`.
24 | - `rerank`:
25 | - `enabled`: Set to `true` to activate the reranking feature.
26 | - `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`.
27 |
28 | Example configuration snippet:
29 |
30 | ```yaml
31 | rag:
32 | similarity_top_k: 10 # Number of documents to retrieve and consider for reranking
33 | rerank:
34 | enabled: true
35 | top_n: 3 # Number of top-ranked documents to use for generating the answer
36 | ```
--------------------------------------------------------------------------------
/private_gpt/settings/settings_loader.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import logging
3 | import os
4 | import sys
5 | from collections.abc import Iterable
6 | from pathlib import Path
7 | from typing import Any
8 |
9 | from pydantic.v1.utils import deep_update, unique_list
10 |
11 | from private_gpt.constants import PROJECT_ROOT_PATH
12 | from private_gpt.settings.yaml import load_yaml_with_envvars
13 |
14 | logger = logging.getLogger(__name__)
15 |
16 | _settings_folder = os.environ.get("PGPT_SETTINGS_FOLDER", PROJECT_ROOT_PATH)
17 |
18 | # if running in unittest, use the test profile
19 | _test_profile = ["test"] if "tests.fixtures" in sys.modules else []
20 |
21 | active_profiles: list[str] = unique_list(
22 | ["default"]
23 | + [
24 | item.strip()
25 | for item in os.environ.get("PGPT_PROFILES", "").split(",")
26 | if item.strip()
27 | ]
28 | + _test_profile
29 | )
30 |
31 |
32 | def merge_settings(settings: Iterable[dict[str, Any]]) -> dict[str, Any]:
33 | return functools.reduce(deep_update, settings, {})
34 |
35 |
36 | def load_settings_from_profile(profile: str) -> dict[str, Any]:
37 | if profile == "default":
38 | profile_file_name = "settings.yaml"
39 | else:
40 | profile_file_name = f"settings-{profile}.yaml"
41 |
42 | path = Path(_settings_folder) / profile_file_name
43 | with Path(path).open("r") as f:
44 | config = load_yaml_with_envvars(f)
45 | if not isinstance(config, dict):
46 | raise TypeError(f"Config file has no top-level mapping: {path}")
47 | return config
48 |
49 |
50 | def load_active_settings() -> dict[str, Any]:
51 | """Load active profiles and merge them."""
52 | logger.info("Starting application with profiles=%s", active_profiles)
53 | loaded_profiles = [
54 | load_settings_from_profile(profile) for profile in active_profiles
55 | ]
56 | merged: dict[str, Any] = merge_settings(loaded_profiles)
57 | return merged
58 |
--------------------------------------------------------------------------------
/tests/server/ingest/test_ingest_routes.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | from pathlib import Path
3 |
4 | from fastapi.testclient import TestClient
5 |
6 | from private_gpt.server.ingest.ingest_router import IngestResponse
7 | from tests.fixtures.ingest_helper import IngestHelper
8 |
9 |
10 | def test_ingest_accepts_txt_files(ingest_helper: IngestHelper) -> None:
11 | path = Path(__file__).parents[0] / "test.txt"
12 | ingest_result = ingest_helper.ingest_file(path)
13 | assert len(ingest_result.data) == 1
14 |
15 |
16 | def test_ingest_accepts_pdf_files(ingest_helper: IngestHelper) -> None:
17 | path = Path(__file__).parents[0] / "test.pdf"
18 | ingest_result = ingest_helper.ingest_file(path)
19 | assert len(ingest_result.data) == 1
20 |
21 |
22 | def test_ingest_list_returns_something_after_ingestion(
23 | test_client: TestClient, ingest_helper: IngestHelper
24 | ) -> None:
25 | response_before = test_client.get("/v1/ingest/list")
26 | count_ingest_before = len(response_before.json()["data"])
27 | with tempfile.NamedTemporaryFile("w", suffix=".txt") as test_file:
28 | test_file.write("Foo bar; hello there!")
29 | test_file.flush()
30 | test_file.seek(0)
31 | ingest_result = ingest_helper.ingest_file(Path(test_file.name))
32 | assert len(ingest_result.data) == 1, "The temp doc should have been ingested"
33 | response_after = test_client.get("/v1/ingest/list")
34 | count_ingest_after = len(response_after.json()["data"])
35 | assert (
36 | count_ingest_after == count_ingest_before + 1
37 | ), "The temp doc should be returned"
38 |
39 |
40 | def test_ingest_plain_text(test_client: TestClient) -> None:
41 | response = test_client.post(
42 | "/v1/ingest/text", json={"file_name": "file_name", "text": "text"}
43 | )
44 | assert response.status_code == 200
45 | ingest_result = IngestResponse.model_validate(response.json())
46 | assert len(ingest_result.data) == 1
47 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | concurrency:
10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref || github.ref }}
11 | cancel-in-progress: ${{ github.event_name == 'pull_request' }}
12 |
13 | jobs:
14 | setup:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - uses: actions/checkout@v3
18 | - uses: ./.github/workflows/actions/install_dependencies
19 |
20 | checks:
21 | needs: setup
22 | runs-on: ubuntu-latest
23 | name: ${{ matrix.quality-command }}
24 | strategy:
25 | matrix:
26 | quality-command:
27 | - black
28 | - ruff
29 | - mypy
30 | steps:
31 | - uses: actions/checkout@v3
32 | - uses: ./.github/workflows/actions/install_dependencies
33 | - name: run ${{ matrix.quality-command }}
34 | run: make ${{ matrix.quality-command }}
35 |
36 | test:
37 | needs: setup
38 | runs-on: ubuntu-latest
39 | name: test
40 | steps:
41 | - uses: actions/checkout@v3
42 | - uses: ./.github/workflows/actions/install_dependencies
43 | - name: run test
44 | run: make test-coverage
45 | # Run even if make test fails for coverage reports
46 | # TODO: select a better xml results displayer
47 | - name: Archive test results coverage results
48 | uses: actions/upload-artifact@v3
49 | if: always()
50 | with:
51 | name: test_results
52 | path: tests-results.xml
53 | - name: Archive code coverage results
54 | uses: actions/upload-artifact@v3
55 | if: always()
56 | with:
57 | name: code-coverage-report
58 | path: htmlcov/
59 |
60 | all_checks_passed:
61 | # Used to easily force requirements checks in GitHub
62 | needs:
63 | - checks
64 | - test
65 | runs-on: ubuntu-latest
66 | steps:
67 | - run: echo "All checks passed"
68 |
--------------------------------------------------------------------------------
/tests/server/utils/test_simple_auth.py:
--------------------------------------------------------------------------------
1 | """Tests to validate that the simple authentication mechanism is working.
2 |
3 | NOTE: We are not testing the switch based on the config in
4 | `private_gpt.server.utils.auth`. This is not done because of the way the code
5 | is currently architecture (it is hard to patch the `settings` and the app while
6 | the tests are directly importing them).
7 | """
8 |
9 | from typing import Annotated
10 |
11 | import pytest
12 | from fastapi import Depends
13 | from fastapi.testclient import TestClient
14 |
15 | from private_gpt.server.utils.auth import (
16 | NOT_AUTHENTICATED,
17 | _simple_authentication,
18 | authenticated,
19 | )
20 | from private_gpt.settings.settings import settings
21 |
22 |
23 | def _copy_simple_authenticated(
24 | _simple_authentication: Annotated[bool, Depends(_simple_authentication)]
25 | ) -> bool:
26 | """Check if the request is authenticated."""
27 | if not _simple_authentication:
28 | raise NOT_AUTHENTICATED
29 | return True
30 |
31 |
32 | @pytest.fixture(autouse=True)
33 | def _patch_authenticated_dependency(test_client: TestClient):
34 | # Patch the server to use simple authentication
35 |
36 | test_client.app.dependency_overrides[authenticated] = _copy_simple_authenticated
37 |
38 | # Call the actual test
39 | yield
40 |
41 | # Remove the patch for other tests
42 | test_client.app.dependency_overrides = {}
43 |
44 |
45 | def test_default_auth_working_when_enabled_401(test_client: TestClient) -> None:
46 | response = test_client.get("/v1/ingest/list")
47 | assert response.status_code == 401
48 |
49 |
50 | def test_default_auth_working_when_enabled_200(test_client: TestClient) -> None:
51 | response_fail = test_client.get("/v1/ingest/list")
52 | assert response_fail.status_code == 401
53 |
54 | response_success = test_client.get(
55 | "/v1/ingest/list", headers={"Authorization": settings().server.auth.secret}
56 | )
57 | assert response_success.status_code == 200
58 |
--------------------------------------------------------------------------------
/private_gpt/server/chunks/chunks_router.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from fastapi import APIRouter, Depends, Request
4 | from pydantic import BaseModel, Field
5 |
6 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
7 | from private_gpt.server.chunks.chunks_service import Chunk, ChunksService
8 | from private_gpt.server.utils.auth import authenticated
9 |
10 | chunks_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
11 |
12 |
13 | class ChunksBody(BaseModel):
14 | text: str = Field(examples=["Q3 2023 sales"])
15 | context_filter: ContextFilter | None = None
16 | limit: int = 10
17 | prev_next_chunks: int = Field(default=0, examples=[2])
18 |
19 |
20 | class ChunksResponse(BaseModel):
21 | object: Literal["list"]
22 | model: Literal["private-gpt"]
23 | data: list[Chunk]
24 |
25 |
26 | @chunks_router.post("/chunks", tags=["Context Chunks"])
27 | def chunks_retrieval(request: Request, body: ChunksBody) -> ChunksResponse:
28 | """Given a `text`, returns the most relevant chunks from the ingested documents.
29 |
30 | The returned information can be used to generate prompts that can be
31 | passed to `/completions` or `/chat/completions` APIs. Note: it is usually a very
32 | fast API, because only the Embeddings model is involved, not the LLM. The
33 | returned information contains the relevant chunk `text` together with the source
34 | `document` it is coming from. It also contains a score that can be used to
35 | compare different results.
36 |
37 | The max number of chunks to be returned is set using the `limit` param.
38 |
39 | Previous and next chunks (pieces of text that appear right before or after in the
40 | document) can be fetched by using the `prev_next_chunks` field.
41 |
42 | The documents being used can be filtered using the `context_filter` and passing
43 | the document IDs to be used. Ingested documents IDs can be found using
44 | `/ingest/list` endpoint. If you want all ingested documents to be used,
45 | remove `context_filter` altogether.
46 | """
47 | service = request.state.injector.get(ChunksService)
48 | results = service.retrieve_relevant(
49 | body.text, body.context_filter, body.limit, body.prev_next_chunks
50 | )
51 | return ChunksResponse(
52 | object="list",
53 | model="private-gpt",
54 | data=results,
55 | )
56 |
--------------------------------------------------------------------------------
/tests/server/ingest/test.txt:
--------------------------------------------------------------------------------
1 | Once upon a time, in a magical forest called Enchantia, lived a young and cheerful deer named Zumi. Zumi was no ordinary deer; she was bright-eyed, intelligent, and had a heart full of curiosity. One sunny morning, as the forest came alive with the sweet melodies of chirping birds and rustling leaves, Zumi eagerly pranced through the woods on her way to school.
2 |
3 | Enchantia Forest School was a unique place, where all the woodland creatures gathered to learn and grow together. The school was nestled in a clearing surrounded by tall, ancient trees. Zumi loved the feeling of anticipation as she approached the school, her hooves barely touching the ground in excitement.
4 |
5 | As she arrived at the school, her dear friend and classmate, Oliver the wise old owl, greeted her with a friendly hoot. "Good morning, Zumi! Are you ready for another day of adventure and learning?"
6 |
7 | Zumi's eyes sparkled with enthusiasm as she nodded, "Absolutely, Oliver! I can't wait to see what we'll discover today."
8 |
9 | In their classroom, Teacher Willow, a gentle and nurturing willow tree, welcomed the students. The classroom was adorned with vibrant leaves and twinkling fireflies, creating a magical and cozy atmosphere. Today's lesson was about the history of the forest and the importance of living harmoniously with nature.
10 |
11 | The students listened attentively as Teacher Willow recounted stories of ancient times when the forest thrived in unity and peace. Zumi was particularly enthralled by the tales of forest guardians and how they protected the magical balance of Enchantia.
12 |
13 | After the lesson, it was time for recess. Zumi joined her friends in a lively game of tag, where they darted and danced playfully among the trees. Zumi's speed and agility made her an excellent tagger, and laughter filled the air as they played.
14 |
15 | Later, they gathered for an art class, where they expressed themselves through painting and sculpting with clay. Zumi chose to paint a mural of the forest, portraying the beauty and magic they were surrounded by every day.
16 |
17 | As the day came to an end, the students sat in a circle to share stories and reflections. Zumi shared her excitement for the day and how she learned to appreciate the interconnectedness of all creatures in the forest.
18 |
19 | As the sun set, casting a golden glow across the forest, Zumi made her way back home, her heart brimming with happiness and newfound knowledge. Each day at Enchantia Forest School was an adventure, and Zumi couldn't wait to learn more and grow with her friends, for the magic of learning was as boundless as the forest itself. And so, under the canopy of stars and the watchful eyes of the forest, Zumi drifted into dreams filled with wonder and anticipation for the adventures that awaited her on the morrow.
--------------------------------------------------------------------------------
/private_gpt/launcher.py:
--------------------------------------------------------------------------------
1 | """FastAPI app creation, logger configuration and main API routes."""
2 |
3 | import logging
4 |
5 | from fastapi import Depends, FastAPI, Request
6 | from fastapi.middleware.cors import CORSMiddleware
7 | from injector import Injector
8 | from llama_index.core.callbacks import CallbackManager
9 | from llama_index.core.callbacks.global_handlers import create_global_handler
10 | from llama_index.core.settings import Settings as LlamaIndexSettings
11 |
12 | from private_gpt.server.chat.chat_router import chat_router
13 | from private_gpt.server.chunks.chunks_router import chunks_router
14 | from private_gpt.server.completions.completions_router import completions_router
15 | from private_gpt.server.embeddings.embeddings_router import embeddings_router
16 | from private_gpt.server.health.health_router import health_router
17 | from private_gpt.server.ingest.ingest_router import ingest_router
18 | from private_gpt.settings.settings import Settings
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 |
23 | def create_app(root_injector: Injector) -> FastAPI:
24 |
25 | # Start the API
26 | async def bind_injector_to_request(request: Request) -> None:
27 | request.state.injector = root_injector
28 |
29 | app = FastAPI(dependencies=[Depends(bind_injector_to_request)])
30 |
31 | app.include_router(completions_router)
32 | app.include_router(chat_router)
33 | app.include_router(chunks_router)
34 | app.include_router(ingest_router)
35 | app.include_router(embeddings_router)
36 | app.include_router(health_router)
37 |
38 | # Add LlamaIndex simple observability
39 | global_handler = create_global_handler("simple")
40 | LlamaIndexSettings.callback_manager = CallbackManager([global_handler])
41 |
42 | settings = root_injector.get(Settings)
43 | if settings.server.cors.enabled:
44 | logger.debug("Setting up CORS middleware")
45 | app.add_middleware(
46 | CORSMiddleware,
47 | allow_credentials=settings.server.cors.allow_credentials,
48 | allow_origins=settings.server.cors.allow_origins,
49 | allow_origin_regex=settings.server.cors.allow_origin_regex,
50 | allow_methods=settings.server.cors.allow_methods,
51 | allow_headers=settings.server.cors.allow_headers,
52 | )
53 |
54 | if settings.ui.enabled:
55 | logger.debug("Importing the UI module")
56 | try:
57 | from private_gpt.ui.ui import PrivateGptUi
58 | except ImportError as e:
59 | raise ImportError(
60 | "UI dependencies not found, install with `poetry install --extras ui`"
61 | ) from e
62 |
63 | ui = root_injector.get(PrivateGptUi)
64 | ui.mount_in_app(app, settings.ui.path)
65 |
66 | return app
67 |
--------------------------------------------------------------------------------
/private_gpt/server/utils/auth.py:
--------------------------------------------------------------------------------
1 | """Authentication mechanism for the API.
2 |
3 | Define a simple mechanism to authenticate requests.
4 | More complex authentication mechanisms can be defined here, and be placed in the
5 | `authenticated` method (being a 'bean' injected in fastapi routers).
6 |
7 | Authorization can also be made after the authentication, and depends on
8 | the authentication. Authorization should not be implemented in this file.
9 |
10 | Authorization can be done by following fastapi's guides:
11 | * https://fastapi.tiangolo.com/advanced/security/oauth2-scopes/
12 | * https://fastapi.tiangolo.com/tutorial/security/
13 | * https://fastapi.tiangolo.com/tutorial/dependencies/dependencies-in-path-operation-decorators/
14 | """
15 |
16 | # mypy: ignore-errors
17 | # Disabled mypy error: All conditional function variants must have identical signatures
18 | # We are changing the implementation of the authenticated method, based on
19 | # the config. If the auth is not enabled, we are not defining the complex method
20 | # with its dependencies.
21 | import logging
22 | import secrets
23 | from typing import Annotated
24 |
25 | from fastapi import Depends, Header, HTTPException
26 |
27 | from private_gpt.settings.settings import settings
28 |
29 | # 401 signify that the request requires authentication.
30 | # 403 signify that the authenticated user is not authorized to perform the operation.
31 | NOT_AUTHENTICATED = HTTPException(
32 | status_code=401,
33 | detail="Not authenticated",
34 | headers={"WWW-Authenticate": 'Basic realm="All the API", charset="UTF-8"'},
35 | )
36 |
37 | logger = logging.getLogger(__name__)
38 |
39 |
40 | def _simple_authentication(authorization: Annotated[str, Header()] = "") -> bool:
41 | """Check if the request is authenticated."""
42 | if not secrets.compare_digest(authorization, settings().server.auth.secret):
43 | # If the "Authorization" header is not the expected one, raise an exception.
44 | raise NOT_AUTHENTICATED
45 | return True
46 |
47 |
48 | if not settings().server.auth.enabled:
49 | logger.debug(
50 | "Defining a dummy authentication mechanism for fastapi, always authenticating requests"
51 | )
52 |
53 | # Define a dummy authentication method that always returns True.
54 | def authenticated() -> bool:
55 | """Check if the request is authenticated."""
56 | return True
57 |
58 | else:
59 | logger.info("Defining the given authentication mechanism for the API")
60 |
61 | # Method to be used as a dependency to check if the request is authenticated.
62 | def authenticated(
63 | _simple_authentication: Annotated[bool, Depends(_simple_authentication)]
64 | ) -> bool:
65 | """Check if the request is authenticated."""
66 | assert settings().server.auth.enabled
67 | if not _simple_authentication:
68 | raise NOT_AUTHENTICATED
69 | return True
70 |
--------------------------------------------------------------------------------
/private_gpt/components/node_store/node_store_component.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from injector import inject, singleton
4 | from llama_index.core.storage.docstore import BaseDocumentStore, SimpleDocumentStore
5 | from llama_index.core.storage.index_store import SimpleIndexStore
6 | from llama_index.core.storage.index_store.types import BaseIndexStore
7 |
8 | from private_gpt.paths import local_data_path
9 | from private_gpt.settings.settings import Settings
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | @singleton
15 | class NodeStoreComponent:
16 | index_store: BaseIndexStore
17 | doc_store: BaseDocumentStore
18 |
19 | @inject
20 | def __init__(self, settings: Settings) -> None:
21 | match settings.nodestore.database:
22 | case "simple":
23 | try:
24 | self.index_store = SimpleIndexStore.from_persist_dir(
25 | persist_dir=str(local_data_path)
26 | )
27 | except FileNotFoundError:
28 | logger.debug("Local index store not found, creating a new one")
29 | self.index_store = SimpleIndexStore()
30 |
31 | try:
32 | self.doc_store = SimpleDocumentStore.from_persist_dir(
33 | persist_dir=str(local_data_path)
34 | )
35 | except FileNotFoundError:
36 | logger.debug("Local document store not found, creating a new one")
37 | self.doc_store = SimpleDocumentStore()
38 |
39 | case "postgres":
40 | try:
41 | from llama_index.core.storage.docstore.postgres_docstore import (
42 | PostgresDocumentStore,
43 | )
44 | from llama_index.core.storage.index_store.postgres_index_store import (
45 | PostgresIndexStore,
46 | )
47 | except ImportError:
48 | raise ImportError(
49 | "Postgres dependencies not found, install with `poetry install --extras storage-nodestore-postgres`"
50 | ) from None
51 |
52 | if settings.postgres is None:
53 | raise ValueError("Postgres index/doc store settings not found.")
54 |
55 | self.index_store = PostgresIndexStore.from_params(
56 | **settings.postgres.model_dump(exclude_none=True)
57 | )
58 | self.doc_store = PostgresDocumentStore.from_params(
59 | **settings.postgres.model_dump(exclude_none=True)
60 | )
61 |
62 | case _:
63 | # Should be unreachable
64 | # The settings validator should have caught this
65 | raise ValueError(
66 | f"Database {settings.nodestore.database} not supported"
67 | )
68 |
--------------------------------------------------------------------------------
/fern/docs/pages/manual/nodestore.mdx:
--------------------------------------------------------------------------------
1 | ## NodeStores
2 | PrivateGPT supports **Simple** and [Postgres](https://www.postgresql.org/) providers. Simple being the default.
3 |
4 | In order to select one or the other, set the `nodestore.database` property in the `settings.yaml` file to `simple` or `postgres`.
5 |
6 | ```yaml
7 | nodestore:
8 | database: simple
9 | ```
10 |
11 | ### Simple Document Store
12 |
13 | Setting up simple document store: Persist data with in-memory and disk storage.
14 |
15 | Enabling the simple document store is an excellent choice for small projects or proofs of concept where you need to persist data while maintaining minimal setup complexity. To get started, set the nodestore.database property in your settings.yaml file as follows:
16 |
17 | ```yaml
18 | nodestore:
19 | database: simple
20 | ```
21 | The beauty of the simple document store is its flexibility and ease of implementation. It provides a solid foundation for managing and retrieving data without the need for complex setup or configuration. The combination of in-memory processing and disk persistence ensures that you can efficiently handle small to medium-sized datasets while maintaining data consistency across runs.
22 |
23 | ### Postgres Document Store
24 |
25 | To enable Postgres, set the `nodestore.database` property in the `settings.yaml` file to `postgres` and install the `storage-nodestore-postgres` extra. Note: Vector Embeddings Storage in Postgres is configured separately
26 |
27 | ```bash
28 | poetry install --extras storage-nodestore-postgres
29 | ```
30 |
31 | The available configuration options are:
32 | | Field | Description |
33 | |---------------|-----------------------------------------------------------|
34 | | **host** | The server hosting the Postgres database. Default is `localhost` |
35 | | **port** | The port on which the Postgres database is accessible. Default is `5432` |
36 | | **database** | The specific database to connect to. Default is `postgres` |
37 | | **user** | The username for database access. Default is `postgres` |
38 | | **password** | The password for database access. (Required) |
39 | | **schema_name** | The database schema to use. Default is `private_gpt` |
40 |
41 | For example:
42 | ```yaml
43 | nodestore:
44 | database: postgres
45 |
46 | postgres:
47 | host: localhost
48 | port: 5432
49 | database: postgres
50 | user: postgres
51 | password:
52 | schema_name: private_gpt
53 | ```
54 |
55 | Given the above configuration, Two PostgreSQL tables will be created upon successful connection: one for storing metadata related to the index and another for document data itself.
56 |
57 | ```
58 | postgres=# \dt private_gpt.*
59 | List of relations
60 | Schema | Name | Type | Owner
61 | -------------+-----------------+-------+--------------
62 | private_gpt | data_docstore | table | postgres
63 | private_gpt | data_indexstore | table | postgres
64 |
65 | postgres=#
66 | ```
67 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Any args passed to the make script, use with $(call args, default_value)
2 | args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}`
3 |
4 | ########################################################################################################################
5 | # Quality checks
6 | ########################################################################################################################
7 |
8 | test:
9 | PYTHONPATH=. poetry run pytest tests
10 |
11 | test-coverage:
12 | PYTHONPATH=. poetry run pytest tests --cov private_gpt --cov-report term --cov-report=html --cov-report xml --junit-xml=tests-results.xml
13 |
14 | black:
15 | poetry run black . --check
16 |
17 | ruff:
18 | poetry run ruff check private_gpt tests
19 |
20 | format:
21 | poetry run black .
22 | poetry run ruff check private_gpt tests --fix
23 |
24 | mypy:
25 | poetry run mypy private_gpt
26 |
27 | check:
28 | make format
29 | make mypy
30 |
31 | ########################################################################################################################
32 | # Run
33 | ########################################################################################################################
34 |
35 | run:
36 | poetry run python -m private_gpt
37 |
38 | dev-windows:
39 | (set PGPT_PROFILES=local & poetry run python -m uvicorn private_gpt.main:app --reload --port 8001)
40 |
41 | dev:
42 | PYTHONUNBUFFERED=1 PGPT_PROFILES=local poetry run python -m uvicorn private_gpt.main:app --reload --port 8001
43 |
44 | ########################################################################################################################
45 | # Misc
46 | ########################################################################################################################
47 |
48 | api-docs:
49 | PGPT_PROFILES=mock poetry run python scripts/extract_openapi.py private_gpt.main:app --out fern/openapi/openapi.json
50 |
51 | ingest:
52 | @poetry run python scripts/ingest_folder.py $(call args)
53 |
54 | stats:
55 | poetry run python scripts/utils.py stats
56 |
57 | wipe:
58 | poetry run python scripts/utils.py wipe
59 |
60 | setup:
61 | poetry run python scripts/setup
62 |
63 | list:
64 | @echo "Available commands:"
65 | @echo " test : Run tests using pytest"
66 | @echo " test-coverage : Run tests with coverage report"
67 | @echo " black : Check code format with black"
68 | @echo " ruff : Check code with ruff"
69 | @echo " format : Format code with black and ruff"
70 | @echo " mypy : Run mypy for type checking"
71 | @echo " check : Run format and mypy commands"
72 | @echo " run : Run the application"
73 | @echo " dev-windows : Run the application in development mode on Windows"
74 | @echo " dev : Run the application in development mode"
75 | @echo " api-docs : Generate API documentation"
76 | @echo " ingest : Ingest data using specified script"
77 | @echo " wipe : Wipe data using specified script"
78 | @echo " setup : Setup the application"
79 |
--------------------------------------------------------------------------------
/private_gpt/components/embedding/custom/sagemaker.py:
--------------------------------------------------------------------------------
1 | # mypy: ignore-errors
2 | import json
3 | from typing import Any
4 |
5 | import boto3
6 | from llama_index.core.base.embeddings.base import BaseEmbedding
7 | from pydantic import Field, PrivateAttr
8 |
9 |
10 | class SagemakerEmbedding(BaseEmbedding):
11 | """Sagemaker Embedding Endpoint.
12 |
13 | To use, you must supply the endpoint name from your deployed
14 | Sagemaker embedding model & the region where it is deployed.
15 |
16 | To authenticate, the AWS client uses the following methods to
17 | automatically load credentials:
18 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html
19 |
20 | If a specific credential profile should be used, you must pass
21 | the name of the profile from the ~/.aws/credentials file that is to be used.
22 |
23 | Make sure the credentials / roles used have the required policies to
24 | access the Sagemaker endpoint.
25 | See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html
26 | """
27 |
28 | endpoint_name: str = Field(description="")
29 |
30 | _boto_client: Any = boto3.client(
31 | "sagemaker-runtime",
32 | ) # TODO make it an optional field
33 |
34 | _async_not_implemented_warned: bool = PrivateAttr(default=False)
35 |
36 | @classmethod
37 | def class_name(cls) -> str:
38 | return "SagemakerEmbedding"
39 |
40 | def _async_not_implemented_warn_once(self) -> None:
41 | if not self._async_not_implemented_warned:
42 | print("Async embedding not available, falling back to sync method.")
43 | self._async_not_implemented_warned = True
44 |
45 | def _embed(self, sentences: list[str]) -> list[list[float]]:
46 | request_params = {
47 | "inputs": sentences,
48 | }
49 |
50 | resp = self._boto_client.invoke_endpoint(
51 | EndpointName=self.endpoint_name,
52 | Body=json.dumps(request_params),
53 | ContentType="application/json",
54 | )
55 |
56 | response_body = resp["Body"]
57 | response_str = response_body.read().decode("utf-8")
58 | response_json = json.loads(response_str)
59 |
60 | return response_json["vectors"]
61 |
62 | def _get_query_embedding(self, query: str) -> list[float]:
63 | """Get query embedding."""
64 | return self._embed([query])[0]
65 |
66 | async def _aget_query_embedding(self, query: str) -> list[float]:
67 | # Warn the user that sync is being used
68 | self._async_not_implemented_warn_once()
69 | return self._get_query_embedding(query)
70 |
71 | async def _aget_text_embedding(self, text: str) -> list[float]:
72 | # Warn the user that sync is being used
73 | self._async_not_implemented_warn_once()
74 | return self._get_text_embedding(text)
75 |
76 | def _get_text_embedding(self, text: str) -> list[float]:
77 | """Get text embedding."""
78 | return self._embed([text])[0]
79 |
80 | def _get_text_embeddings(self, texts: list[str]) -> list[list[float]]:
81 | """Get text embeddings."""
82 | return self._embed(texts)
83 |
--------------------------------------------------------------------------------
/fern/docs/pages/manual/ui.mdx:
--------------------------------------------------------------------------------
1 | ## Gradio UI user manual
2 |
3 | Gradio UI is a ready to use way of testing most of PrivateGPT API functionalities.
4 |
5 | 
6 |
7 | ### Execution Modes
8 |
9 | It has 3 modes of execution (you can select in the top-left):
10 |
11 | * Query Docs: uses the context from the
12 | ingested documents to answer the questions posted in the chat. It also takes
13 | into account previous chat messages as context.
14 | * Makes use of `/chat/completions` API with `use_context=true` and no
15 | `context_filter`.
16 | * Search in Docs: fast search that returns the 4 most related text
17 | chunks, together with their source document and page.
18 | * Makes use of `/chunks` API with no `context_filter`, `limit=4` and
19 | `prev_next_chunks=0`.
20 | * LLM Chat: simple, non-contextual chat with the LLM. The ingested documents won't
21 | be taken into account, only the previous messages.
22 | * Makes use of `/chat/completions` API with `use_context=false`.
23 |
24 | ### Document Ingestion
25 |
26 | Ingest documents by using the `Upload a File` button. You can check the progress of
27 | the ingestion in the console logs of the server.
28 |
29 | The list of ingested files is shown below the button.
30 |
31 | If you want to delete the ingested documents, refer to *Reset Local documents
32 | database* section in the documentation.
33 |
34 | ### Chat
35 |
36 | Normal chat interface, self-explanatory ;)
37 |
38 | #### System Prompt
39 | You can view and change the system prompt being passed to the LLM by clicking "Additional Inputs"
40 | in the chat interface. The system prompt is also logged on the server.
41 |
42 | By default, the `Query Docs` mode uses the setting value `ui.default_query_system_prompt`.
43 |
44 | The `LLM Chat` mode attempts to use the optional settings value `ui.default_chat_system_prompt`.
45 |
46 | If no system prompt is entered, the UI will display the default system prompt being used
47 | for the active mode.
48 |
49 | ##### System Prompt Examples:
50 |
51 | The system prompt can effectively provide your chat bot specialized roles, and results tailored to the prompt
52 | you have given the model. Examples of system prompts can be be found
53 | [here](https://www.w3schools.com/gen_ai/chatgpt-3-5/chatgpt-3-5_roles.php).
54 |
55 | Some interesting examples to try include:
56 |
57 | * You are -X-. You have all the knowledge and personality of -X-. Answer as if you were -X- using
58 | their manner of speaking and vocabulary.
59 | * Example: You are Shakespeare. You have all the knowledge and personality of Shakespeare.
60 | Answer as if you were Shakespeare using their manner of speaking and vocabulary.
61 | * You are an expert (at) -role-. Answer all questions using your expertise on -specific domain topic-.
62 | * Example: You are an expert software engineer. Answer all questions using your expertise on Python.
63 | * You are a -role- bot, respond with -response criteria needed-. If no -response criteria- is needed,
64 | respond with -alternate response-.
65 | * Example: You are a grammar checking bot, respond with any grammatical corrections needed. If no corrections
66 | are needed, respond with "verified".
--------------------------------------------------------------------------------
/private_gpt/server/completions/completions_router.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends, Request
2 | from pydantic import BaseModel
3 | from starlette.responses import StreamingResponse
4 |
5 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
6 | from private_gpt.open_ai.openai_models import (
7 | OpenAICompletion,
8 | OpenAIMessage,
9 | )
10 | from private_gpt.server.chat.chat_router import ChatBody, chat_completion
11 | from private_gpt.server.utils.auth import authenticated
12 |
13 | completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
14 |
15 |
16 | class CompletionsBody(BaseModel):
17 | prompt: str
18 | system_prompt: str | None = None
19 | use_context: bool = False
20 | context_filter: ContextFilter | None = None
21 | include_sources: bool = True
22 | stream: bool = False
23 |
24 | model_config = {
25 | "json_schema_extra": {
26 | "examples": [
27 | {
28 | "prompt": "How do you fry an egg?",
29 | "system_prompt": "You are a rapper. Always answer with a rap.",
30 | "stream": False,
31 | "use_context": False,
32 | "include_sources": False,
33 | }
34 | ]
35 | }
36 | }
37 |
38 |
39 | @completions_router.post(
40 | "/completions",
41 | response_model=None,
42 | summary="Completion",
43 | responses={200: {"model": OpenAICompletion}},
44 | tags=["Contextual Completions"],
45 | openapi_extra={
46 | "x-fern-streaming": {
47 | "stream-condition": "stream",
48 | "response": {"$ref": "#/components/schemas/OpenAICompletion"},
49 | "response-stream": {"$ref": "#/components/schemas/OpenAICompletion"},
50 | }
51 | },
52 | )
53 | def prompt_completion(
54 | request: Request, body: CompletionsBody
55 | ) -> OpenAICompletion | StreamingResponse:
56 | """We recommend most users use our Chat completions API.
57 |
58 | Given a prompt, the model will return one predicted completion.
59 |
60 | Optionally include a `system_prompt` to influence the way the LLM answers.
61 |
62 | If `use_context`
63 | is set to `true`, the model will use context coming from the ingested documents
64 | to create the response. The documents being used can be filtered using the
65 | `context_filter` and passing the document IDs to be used. Ingested documents IDs
66 | can be found using `/ingest/list` endpoint. If you want all ingested documents to
67 | be used, remove `context_filter` altogether.
68 |
69 | When using `'include_sources': true`, the API will return the source Chunks used
70 | to create the response, which come from the context provided.
71 |
72 | When using `'stream': true`, the API will return data chunks following [OpenAI's
73 | streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):
74 | ```
75 | {"id":"12345","object":"completion.chunk","created":1694268190,
76 | "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"},
77 | "finish_reason":null}]}
78 | ```
79 | """
80 | messages = [OpenAIMessage(content=body.prompt, role="user")]
81 | # If system prompt is passed, create a fake message with the system prompt.
82 | if body.system_prompt:
83 | messages.insert(0, OpenAIMessage(content=body.system_prompt, role="system"))
84 |
85 | chat_body = ChatBody(
86 | messages=messages,
87 | use_context=body.use_context,
88 | stream=body.stream,
89 | include_sources=body.include_sources,
90 | context_filter=body.context_filter,
91 | )
92 | return chat_completion(request, chat_body)
93 |
--------------------------------------------------------------------------------
/fern/docs/pages/manual/settings.mdx:
--------------------------------------------------------------------------------
1 | # Settings and profiles for your private GPT
2 |
3 | The configuration of your private GPT server is done thanks to `settings` files (more precisely `settings.yaml`).
4 | These text files are written using the [YAML](https://en.wikipedia.org/wiki/YAML) syntax.
5 |
6 | While privateGPT is distributing safe and universal configuration files, you might want to quickly customize your
7 | privateGPT, and this can be done using the `settings` files.
8 |
9 | This project is defining the concept of **profiles** (or configuration profiles).
10 | This mechanism, using your environment variables, is giving you the ability to easily switch between
11 | configuration you've made.
12 |
13 | A typical use case of profile is to easily switch between LLM and embeddings.
14 | To be a bit more precise, you can change the language (to French, Spanish, Italian, English, etc) by simply changing
15 | the profile you've selected; no code changes required!
16 |
17 | PrivateGPT is configured through *profiles* that are defined using yaml files, and selected through env variables.
18 | The full list of properties configurable can be found in `settings.yaml`.
19 |
20 | ## How to know which profiles exist
21 | Given that a profile `foo_bar` points to the file `settings-foo_bar.yaml` and vice-versa, you simply have to look
22 | at the files starting with `settings` and ending in `.yaml`.
23 |
24 | ## How to use an existing profiles
25 | **Please note that the syntax to set the value of an environment variables depends on your OS**.
26 | You have to set environment variable `PGPT_PROFILES` to the name of the profile you want to use.
27 |
28 | For example, on **linux and macOS**, this gives:
29 | ```bash
30 | export PGPT_PROFILES=my_profile_name_here
31 | ```
32 |
33 | Windows Command Prompt (cmd) has a different syntax:
34 | ```shell
35 | set PGPT_PROFILES=my_profile_name_here
36 | ```
37 |
38 | Windows Powershell has a different syntax:
39 | ```shell
40 | $env:PGPT_PROFILES="my_profile_name_here"
41 | ```
42 | If the above is not working, you might want to try other ways to set an env variable in your window's terminal.
43 |
44 | ---
45 |
46 | Once you've set this environment variable to the desired profile, you can simply launch your privateGPT,
47 | and it will run using your profile on top of the default configuration.
48 |
49 | ## Reference
50 | Additional details on the profiles are described in this section
51 |
52 | ### Environment variable `PGPT_SETTINGS_FOLDER`
53 |
54 | The location of the settings folder. Defaults to the root of the project.
55 | Should contain the default `settings.yaml` and any other `settings-{profile}.yaml`.
56 |
57 | ### Environment variable `PGPT_PROFILES`
58 |
59 | By default, the profile definition in `settings.yaml` is loaded.
60 | Using this env var you can load additional profiles; format is a comma separated list of profile names.
61 | This will merge `settings-{profile}.yaml` on top of the base settings file.
62 |
63 | For example:
64 | `PGPT_PROFILES=local,cuda` will load `settings-local.yaml`
65 | and `settings-cuda.yaml`, their contents will be merged with
66 | later profiles properties overriding values of earlier ones like `settings.yaml`.
67 |
68 | During testing, the `test` profile will be active along with the default, therefore `settings-test.yaml`
69 | file is required.
70 |
71 | ### Environment variables expansion
72 |
73 | Configuration files can contain environment variables,
74 | they will be expanded at runtime.
75 |
76 | Expansion must follow the pattern `${VARIABLE_NAME:default_value}`.
77 |
78 | For example, the following configuration will use the value of the `PORT`
79 | environment variable or `8001` if it's not set.
80 | Missing variables with no default will produce an error.
81 |
82 | ```yaml
83 | server:
84 | port: ${PORT:8001}
85 | ```
--------------------------------------------------------------------------------
/private_gpt/components/vector_store/batched_chroma.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Generator
2 | from typing import Any
3 |
4 | from llama_index.core.schema import BaseNode, MetadataMode
5 | from llama_index.core.vector_stores.utils import node_to_metadata_dict
6 | from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore
7 |
8 |
9 | def chunk_list(
10 | lst: list[BaseNode], max_chunk_size: int
11 | ) -> Generator[list[BaseNode], None, None]:
12 | """Yield successive max_chunk_size-sized chunks from lst.
13 |
14 | Args:
15 | lst (List[BaseNode]): list of nodes with embeddings
16 | max_chunk_size (int): max chunk size
17 |
18 | Yields:
19 | Generator[List[BaseNode], None, None]: list of nodes with embeddings
20 | """
21 | for i in range(0, len(lst), max_chunk_size):
22 | yield lst[i : i + max_chunk_size]
23 |
24 |
25 | class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore
26 | """Chroma vector store, batching additions to avoid reaching the max batch limit.
27 |
28 | In this vector store, embeddings are stored within a ChromaDB collection.
29 |
30 | During query time, the index uses ChromaDB to query for the top
31 | k most similar nodes.
32 |
33 | Args:
34 | chroma_client (from chromadb.api.API):
35 | API instance
36 | chroma_collection (chromadb.api.models.Collection.Collection):
37 | ChromaDB collection instance
38 |
39 | """
40 |
41 | chroma_client: Any | None
42 |
43 | def __init__(
44 | self,
45 | chroma_client: Any,
46 | chroma_collection: Any,
47 | host: str | None = None,
48 | port: str | None = None,
49 | ssl: bool = False,
50 | headers: dict[str, str] | None = None,
51 | collection_kwargs: dict[Any, Any] | None = None,
52 | ) -> None:
53 | super().__init__(
54 | chroma_collection=chroma_collection,
55 | host=host,
56 | port=port,
57 | ssl=ssl,
58 | headers=headers,
59 | collection_kwargs=collection_kwargs or {},
60 | )
61 | self.chroma_client = chroma_client
62 |
63 | def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]:
64 | """Add nodes to index, batching the insertion to avoid issues.
65 |
66 | Args:
67 | nodes: List[BaseNode]: list of nodes with embeddings
68 | add_kwargs: _
69 | """
70 | if not self.chroma_client:
71 | raise ValueError("Client not initialized")
72 |
73 | if not self._collection:
74 | raise ValueError("Collection not initialized")
75 |
76 | max_chunk_size = self.chroma_client.max_batch_size
77 | node_chunks = chunk_list(nodes, max_chunk_size)
78 |
79 | all_ids = []
80 | for node_chunk in node_chunks:
81 | embeddings = []
82 | metadatas = []
83 | ids = []
84 | documents = []
85 | for node in node_chunk:
86 | embeddings.append(node.get_embedding())
87 | metadatas.append(
88 | node_to_metadata_dict(
89 | node, remove_text=True, flat_metadata=self.flat_metadata
90 | )
91 | )
92 | ids.append(node.node_id)
93 | documents.append(node.get_content(metadata_mode=MetadataMode.NONE))
94 |
95 | self._collection.add(
96 | embeddings=embeddings,
97 | ids=ids,
98 | metadatas=metadatas,
99 | documents=documents,
100 | )
101 | all_ids.extend(ids)
102 |
103 | return all_ids
104 |
--------------------------------------------------------------------------------
/fern/docs.yml:
--------------------------------------------------------------------------------
1 | # Main Fern configuration file
2 | instances:
3 | - url: privategpt.docs.buildwithfern.com
4 | custom-domain: docs.privategpt.dev
5 |
6 | title: PrivateGPT | Docs
7 |
8 | # The tabs definition, in the top left corner
9 | tabs:
10 | overview:
11 | display-name: Overview
12 | icon: "fa-solid fa-home"
13 | installation:
14 | display-name: Installation
15 | icon: "fa-solid fa-download"
16 | manual:
17 | display-name: Manual
18 | icon: "fa-solid fa-book"
19 | recipes:
20 | display-name: Recipes
21 | icon: "fa-solid fa-flask"
22 | api-reference:
23 | display-name: API Reference
24 | icon: "fa-solid fa-file-contract"
25 |
26 | # Definition of tabs contents, will be displayed on the left side of the page, below all tabs
27 | navigation:
28 | # The default tab
29 | - tab: overview
30 | layout:
31 | - section: Welcome
32 | contents:
33 | - page: Introduction
34 | path: ./docs/pages/overview/welcome.mdx
35 | # How to install privateGPT, with FAQ and troubleshooting
36 | - tab: installation
37 | layout:
38 | - section: Getting started
39 | contents:
40 | - page: Main Concepts
41 | path: ./docs/pages/installation/concepts.mdx
42 | - page: Installation
43 | path: ./docs/pages/installation/installation.mdx
44 | # Manual of privateGPT: how to use it and configure it
45 | - tab: manual
46 | layout:
47 | - section: General configuration
48 | contents:
49 | - page: Configuration
50 | path: ./docs/pages/manual/settings.mdx
51 | - section: Document management
52 | contents:
53 | - page: Ingestion
54 | path: ./docs/pages/manual/ingestion.mdx
55 | - page: Deletion
56 | path: ./docs/pages/manual/ingestion-reset.mdx
57 | - section: Storage
58 | contents:
59 | - page: Vector Stores
60 | path: ./docs/pages/manual/vectordb.mdx
61 | - page: Node Stores
62 | path: ./docs/pages/manual/nodestore.mdx
63 | - section: Advanced Setup
64 | contents:
65 | - page: LLM Backends
66 | path: ./docs/pages/manual/llms.mdx
67 | - page: Reranking
68 | path: ./docs/pages/manual/reranker.mdx
69 | - section: User Interface
70 | contents:
71 | - page: User interface (Gradio) Manual
72 | path: ./docs/pages/manual/ui.mdx
73 | # Small code snippet or example of usage to help users
74 | - tab: recipes
75 | layout:
76 | - section: Choice of LLM
77 | contents:
78 | # TODO: add recipes
79 | - page: List of LLMs
80 | path: ./docs/pages/recipes/list-llm.mdx
81 | # More advanced usage of privateGPT, by API
82 | - tab: api-reference
83 | layout:
84 | - section: Overview
85 | contents:
86 | - page : API Reference overview
87 | path: ./docs/pages/api-reference/api-reference.mdx
88 | - page: SDKs
89 | path: ./docs/pages/api-reference/sdks.mdx
90 | - api: API Reference
91 |
92 | # Definition of the navbar, will be displayed in the top right corner.
93 | # `type:primary` is always displayed at the most right side of the navbar
94 | navbar-links:
95 | - type: secondary
96 | text: Contact us
97 | url: "mailto:hello@zylon.ai"
98 | - type: github
99 | value: "https://github.com/zylon-ai/private-gpt"
100 | - type: primary
101 | text: Join the Discord
102 | url: https://discord.com/invite/bK6mRVpErU
103 |
104 | colors:
105 | accentPrimary:
106 | dark: "#C6BBFF"
107 | light: "#756E98"
108 |
109 | logo:
110 | dark: ./docs/assets/logo_light.png
111 | light: ./docs/assets/logo_dark.png
112 | height: 50
113 |
114 | favicon: ./docs/assets/favicon.ico
115 |
--------------------------------------------------------------------------------
/fern/docs/pages/installation/concepts.mdx:
--------------------------------------------------------------------------------
1 | PrivateGPT is a service that wraps a set of AI RAG primitives in a comprehensive set of APIs providing a private, secure, customizable and easy to use GenAI development framework.
2 |
3 | It uses FastAPI and LLamaIndex as its core frameworks. Those can be customized by changing the codebase itself.
4 |
5 | It supports a variety of LLM providers, embeddings providers, and vector stores, both local and remote. Those can be easily changed without changing the codebase.
6 |
7 | # Different Setups support
8 |
9 | ## Setup configurations available
10 | You get to decide the setup for these 3 main components:
11 | - LLM: the large language model provider used for inference. It can be local, or remote, or even OpenAI.
12 | - Embeddings: the embeddings provider used to encode the input, the documents and the users' queries. Same as the LLM, it can be local, or remote, or even OpenAI.
13 | - Vector store: the store used to index and retrieve the documents.
14 |
15 | There is an extra component that can be enabled or disabled: the UI. It is a Gradio UI that allows to interact with the API in a more user-friendly way.
16 |
17 | ### Setups and Dependencies
18 | Your setup will be the combination of the different options available. You'll find recommended setups in the [installation](./installation) section.
19 | PrivateGPT uses poetry to manage its dependencies. You can install the dependencies for the different setups by running `poetry install --extras " ..."`.
20 | Extras are the different options available for each component. For example, to install the dependencies for a a local setup with UI and qdrant as vector database, Ollama as LLM and HuggingFace as local embeddings, you would run
21 |
22 | `poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-huggingface"`.
23 |
24 | Refer to the [installation](./installation) section for more details.
25 |
26 | ### Setups and Configuration
27 | PrivateGPT uses yaml to define its configuration in files named `settings-.yaml`.
28 | Different configuration files can be created in the root directory of the project.
29 | PrivateGPT will load the configuration at startup from the profile specified in the `PGPT_PROFILES` environment variable.
30 | For example, running:
31 | ```bash
32 | PGPT_PROFILES=ollama make run
33 | ```
34 | will load the configuration from `settings.yaml` and `settings-ollama.yaml`.
35 | - `settings.yaml` is always loaded and contains the default configuration.
36 | - `settings-ollama.yaml` is loaded if the `ollama` profile is specified in the `PGPT_PROFILES` environment variable. It can override configuration from the default `settings.yaml`
37 |
38 | ## About Fully Local Setups
39 | In order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally.
40 | ### Vector stores
41 | The vector stores supported (Qdrant, ChromaDB and Postgres) run locally by default.
42 | ### Embeddings
43 | For local Embeddings there are two options:
44 | * (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.
45 | * You can use the 'embeddings-huggingface' option in PrivateGPT, which will use HuggingFace.
46 |
47 | In order for HuggingFace LLM to work (the second option), you need to download the embeddings model to the `models` folder. You can do so by running the `setup` script:
48 | ```bash
49 | poetry run python scripts/setup
50 | ```
51 |
52 | ### LLM
53 | For local LLM there are two options:
54 | * (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs.
55 | * You can use the 'llms-llama-cpp' option in PrivateGPT, which will use LlamaCPP. It works great on Mac with Metal most of the times (leverages Metal GPU), but it can be tricky in certain Linux and Windows distributions, depending on the GPU. In the installation document you'll find guides and troubleshooting.
56 |
57 | In order for LlamaCPP powered LLM to work (the second option), you need to download the LLM model to the `models` folder. You can do so by running the `setup` script:
58 | ```bash
59 | poetry run python scripts/setup
60 | ```
61 |
--------------------------------------------------------------------------------
/private_gpt/open_ai/openai_models.py:
--------------------------------------------------------------------------------
1 | import time
2 | import uuid
3 | from collections.abc import Iterator
4 | from typing import Literal
5 |
6 | from llama_index.core.llms import ChatResponse, CompletionResponse
7 | from pydantic import BaseModel, Field
8 |
9 | from private_gpt.server.chunks.chunks_service import Chunk
10 |
11 |
12 | class OpenAIDelta(BaseModel):
13 | """A piece of completion that needs to be concatenated to get the full message."""
14 |
15 | content: str | None
16 |
17 |
18 | class OpenAIMessage(BaseModel):
19 | """Inference result, with the source of the message.
20 |
21 | Role could be the assistant or system
22 | (providing a default response, not AI generated).
23 | """
24 |
25 | role: Literal["assistant", "system", "user"] = Field(default="user")
26 | content: str | None
27 |
28 |
29 | class OpenAIChoice(BaseModel):
30 | """Response from AI.
31 |
32 | Either the delta or the message will be present, but never both.
33 | Sources used will be returned in case context retrieval was enabled.
34 | """
35 |
36 | finish_reason: str | None = Field(examples=["stop"])
37 | delta: OpenAIDelta | None = None
38 | message: OpenAIMessage | None = None
39 | sources: list[Chunk] | None = None
40 | index: int = 0
41 |
42 |
43 | class OpenAICompletion(BaseModel):
44 | """Clone of OpenAI Completion model.
45 |
46 | For more information see: https://platform.openai.com/docs/api-reference/chat/object
47 | """
48 |
49 | id: str
50 | object: Literal["completion", "completion.chunk"] = Field(default="completion")
51 | created: int = Field(..., examples=[1623340000])
52 | model: Literal["private-gpt"]
53 | choices: list[OpenAIChoice]
54 |
55 | @classmethod
56 | def from_text(
57 | cls,
58 | text: str | None,
59 | finish_reason: str | None = None,
60 | sources: list[Chunk] | None = None,
61 | ) -> "OpenAICompletion":
62 | return OpenAICompletion(
63 | id=str(uuid.uuid4()),
64 | object="completion",
65 | created=int(time.time()),
66 | model="private-gpt",
67 | choices=[
68 | OpenAIChoice(
69 | message=OpenAIMessage(role="assistant", content=text),
70 | finish_reason=finish_reason,
71 | sources=sources,
72 | )
73 | ],
74 | )
75 |
76 | @classmethod
77 | def json_from_delta(
78 | cls,
79 | *,
80 | text: str | None,
81 | finish_reason: str | None = None,
82 | sources: list[Chunk] | None = None,
83 | ) -> str:
84 | chunk = OpenAICompletion(
85 | id=str(uuid.uuid4()),
86 | object="completion.chunk",
87 | created=int(time.time()),
88 | model="private-gpt",
89 | choices=[
90 | OpenAIChoice(
91 | delta=OpenAIDelta(content=text),
92 | finish_reason=finish_reason,
93 | sources=sources,
94 | )
95 | ],
96 | )
97 |
98 | return chunk.model_dump_json()
99 |
100 |
101 | def to_openai_response(
102 | response: str | ChatResponse, sources: list[Chunk] | None = None
103 | ) -> OpenAICompletion:
104 | if isinstance(response, ChatResponse):
105 | return OpenAICompletion.from_text(response.delta, finish_reason="stop")
106 | else:
107 | return OpenAICompletion.from_text(
108 | response, finish_reason="stop", sources=sources
109 | )
110 |
111 |
112 | def to_openai_sse_stream(
113 | response_generator: Iterator[str | CompletionResponse | ChatResponse],
114 | sources: list[Chunk] | None = None,
115 | ) -> Iterator[str]:
116 | for response in response_generator:
117 | if isinstance(response, CompletionResponse | ChatResponse):
118 | yield f"data: {OpenAICompletion.json_from_delta(text=response.delta)}\n\n"
119 | else:
120 | yield f"data: {OpenAICompletion.json_from_delta(text=response, sources=sources)}\n\n"
121 | yield f"data: {OpenAICompletion.json_from_delta(text='', finish_reason='stop')}\n\n"
122 | yield "data: [DONE]\n\n"
123 |
--------------------------------------------------------------------------------
/fern/docs/pages/recipes/list-llm.mdx:
--------------------------------------------------------------------------------
1 | # List of working LLM
2 |
3 | **Do you have any working combination of LLM and embeddings?**
4 | Please open a PR to add it to the list, and come on our Discord to tell us about it!
5 |
6 | ## Prompt style
7 |
8 | LLMs might have been trained with different prompt styles.
9 | The prompt style is the way the prompt is written, and how the system message is injected in the prompt.
10 |
11 | For example, `llama2` looks like this:
12 | ```text
13 | [INST] <>
14 | {{ system_prompt }}
15 | <>
16 |
17 | {{ user_message }} [/INST]
18 | ```
19 |
20 | While `default` (the `llama_index` default) looks like this:
21 | ```text
22 | system: {{ system_prompt }}
23 | user: {{ user_message }}
24 | assistant: {{ assistant_message }}
25 | ```
26 |
27 | The "`tag`" style looks like this:
28 |
29 | ```text
30 | <|system|>: {{ system_prompt }}
31 | <|user|>: {{ user_message }}
32 | <|assistant|>: {{ assistant_message }}
33 | ```
34 |
35 | The "`mistral`" style looks like this:
36 |
37 | ```text
38 | [INST] You are an AI assistant. [/INST][INST] Hello, how are you doing? [/INST]
39 | ```
40 |
41 | The "`chatml`" style looks like this:
42 | ```text
43 | <|im_start|>system
44 | {{ system_prompt }}<|im_end|>
45 | <|im_start|>user"
46 | {{ user_message }}<|im_end|>
47 | <|im_start|>assistant
48 | {{ assistant_message }}
49 | ```
50 |
51 | Some LLMs will not understand these prompt styles, and will not work (returning nothing).
52 | You can try to change the prompt style to `default` (or `tag`) in the settings, and it will
53 | change the way the messages are formatted to be passed to the LLM.
54 |
55 | ## Example of configuration
56 |
57 | You might want to change the prompt depending on the language and model you are using.
58 |
59 | ### English, with instructions
60 |
61 | `settings-en.yaml`:
62 | ```yml
63 | local:
64 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF
65 | llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf
66 | embedding_hf_model_name: BAAI/bge-small-en-v1.5
67 | prompt_style: "llama2"
68 | ```
69 |
70 | ### French, with instructions
71 |
72 | `settings-fr.yaml`:
73 | ```yml
74 | local:
75 | llm_hf_repo_id: TheBloke/Vigogne-2-7B-Instruct-GGUF
76 | llm_hf_model_file: vigogne-2-7b-instruct.Q4_K_M.gguf
77 | embedding_hf_model_name: dangvantuan/sentence-camembert-base
78 | prompt_style: "default"
79 | # prompt_style: "tag" # also works
80 | # The default system prompt is injected only when the `prompt_style` != default, and there are no system message in the discussion
81 | # default_system_prompt: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français.
82 | ```
83 |
84 | You might want to change the prompt as the one above might not directly answer your question.
85 | You can read online about how to write a good prompt, but in a nutshell, make it (extremely) directive.
86 |
87 | You can try and troubleshot your prompt by writing multiline requests in the UI, while
88 | writing your interaction with the model, for example:
89 |
90 | ```text
91 | Tu es un programmeur senior qui programme en python et utilise le framework fastapi. Ecrit moi un serveur qui retourne "hello world".
92 | ```
93 |
94 | Another example:
95 | ```text
96 | Context: None
97 | Situation: tu es au milieu d'un champ.
98 | Tache: va a la rivière, en bas du champ.
99 | Décrit comment aller a la rivière.
100 | ```
101 |
102 | ### Optimised Models
103 | GodziLLa2-70B LLM (English, rank 2 on HuggingFace OpenLLM Leaderboard), bge large Embedding Model (rank 1 on HuggingFace MTEB Leaderboard)
104 | `settings-optimised.yaml`:
105 | ```yml
106 | local:
107 | llm_hf_repo_id: TheBloke/GodziLLa2-70B-GGUF
108 | llm_hf_model_file: godzilla2-70b.Q4_K_M.gguf
109 | embedding_hf_model_name: BAAI/bge-large-en
110 | prompt_style: "llama2"
111 | ```
112 | ### German speaking model
113 | `settings-de.yaml`:
114 | ```yml
115 | local:
116 | llm_hf_repo_id: TheBloke/em_german_leo_mistral-GGUF
117 | llm_hf_model_file: em_german_leo_mistral.Q4_K_M.gguf
118 | embedding_hf_model_name: T-Systems-onsite/german-roberta-sentence-transformer-v2
119 | #llama, default or tag
120 | prompt_style: "default"
121 | ```
122 |
--------------------------------------------------------------------------------
/scripts/ingest_folder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import argparse
4 | import logging
5 | from pathlib import Path
6 |
7 | from private_gpt.di import global_injector
8 | from private_gpt.server.ingest.ingest_service import IngestService
9 | from private_gpt.server.ingest.ingest_watcher import IngestWatcher
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class LocalIngestWorker:
15 | def __init__(self, ingest_service: IngestService) -> None:
16 | self.ingest_service = ingest_service
17 |
18 | self.total_documents = 0
19 | self.current_document_count = 0
20 |
21 | self._files_under_root_folder: list[Path] = []
22 |
23 | def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None:
24 | """Search all files under the root folder recursively.
25 |
26 | Count them at the same time
27 | """
28 | for file_path in root_path.iterdir():
29 | if file_path.is_file() and file_path.name not in ignored:
30 | self.total_documents += 1
31 | self._files_under_root_folder.append(file_path)
32 | elif file_path.is_dir() and file_path.name not in ignored:
33 | self._find_all_files_in_folder(file_path, ignored)
34 |
35 | def ingest_folder(self, folder_path: Path, ignored: list[str]) -> None:
36 | # Count total documents before ingestion
37 | self._find_all_files_in_folder(folder_path, ignored)
38 | self._ingest_all(self._files_under_root_folder)
39 |
40 | def _ingest_all(self, files_to_ingest: list[Path]) -> None:
41 | logger.info("Ingesting files=%s", [f.name for f in files_to_ingest])
42 | self.ingest_service.bulk_ingest([(str(p.name), p) for p in files_to_ingest])
43 |
44 | def ingest_on_watch(self, changed_path: Path) -> None:
45 | logger.info("Detected change in at path=%s, ingesting", changed_path)
46 | self._do_ingest_one(changed_path)
47 |
48 | def _do_ingest_one(self, changed_path: Path) -> None:
49 | try:
50 | if changed_path.exists():
51 | logger.info(f"Started ingesting file={changed_path}")
52 | self.ingest_service.ingest_file(changed_path.name, changed_path)
53 | logger.info(f"Completed ingesting file={changed_path}")
54 | except Exception:
55 | logger.exception(
56 | f"Failed to ingest document: {changed_path}, find the exception attached"
57 | )
58 |
59 |
60 | parser = argparse.ArgumentParser(prog="ingest_folder.py")
61 | parser.add_argument("folder", help="Folder to ingest")
62 | parser.add_argument(
63 | "--watch",
64 | help="Watch for changes",
65 | action=argparse.BooleanOptionalAction,
66 | default=False,
67 | )
68 | parser.add_argument(
69 | "--ignored",
70 | nargs="*",
71 | help="List of files/directories to ignore",
72 | default=[],
73 | )
74 | parser.add_argument(
75 | "--log-file",
76 | help="Optional path to a log file. If provided, logs will be written to this file.",
77 | type=str,
78 | default=None,
79 | )
80 |
81 | args = parser.parse_args()
82 |
83 | # Set up logging to a file if a path is provided
84 | if args.log_file:
85 | file_handler = logging.FileHandler(args.log_file, mode="a")
86 | file_handler.setFormatter(
87 | logging.Formatter(
88 | "[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s",
89 | datefmt="%Y-%m-%d %H:%M:%S",
90 | )
91 | )
92 | logger.addHandler(file_handler)
93 |
94 | if __name__ == "__main__":
95 |
96 | root_path = Path(args.folder)
97 | if not root_path.exists():
98 | raise ValueError(f"Path {args.folder} does not exist")
99 |
100 | ingest_service = global_injector.get(IngestService)
101 | worker = LocalIngestWorker(ingest_service)
102 | worker.ingest_folder(root_path, args.ignored)
103 |
104 | if args.ignored:
105 | logger.info(f"Skipping following files and directories: {args.ignored}")
106 |
107 | if args.watch:
108 | logger.info(f"Watching {args.folder} for changes, press Ctrl+C to stop...")
109 | directories_to_watch = [
110 | dir
111 | for dir in root_path.iterdir()
112 | if dir.is_dir() and dir.name not in args.ignored
113 | ]
114 | watcher = IngestWatcher(args.folder, worker.ingest_on_watch)
115 | watcher.start()
116 |
--------------------------------------------------------------------------------
/private_gpt/components/ingest/ingest_helper.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from pathlib import Path
3 |
4 | from llama_index.core.readers import StringIterableReader
5 | from llama_index.core.readers.base import BaseReader
6 | from llama_index.core.readers.json import JSONReader
7 | from llama_index.core.schema import Document
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | # Inspired by the `llama_index.core.readers.file.base` module
13 | def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]:
14 | try:
15 | from llama_index.readers.file.docs import ( # type: ignore
16 | DocxReader,
17 | HWPReader,
18 | PDFReader,
19 | )
20 | from llama_index.readers.file.epub import EpubReader # type: ignore
21 | from llama_index.readers.file.image import ImageReader # type: ignore
22 | from llama_index.readers.file.ipynb import IPYNBReader # type: ignore
23 | from llama_index.readers.file.markdown import MarkdownReader # type: ignore
24 | from llama_index.readers.file.mbox import MboxReader # type: ignore
25 | from llama_index.readers.file.slides import PptxReader # type: ignore
26 | from llama_index.readers.file.tabular import PandasCSVReader # type: ignore
27 | from llama_index.readers.file.video_audio import ( # type: ignore
28 | VideoAudioReader,
29 | )
30 | except ImportError as e:
31 | raise ImportError("`llama-index-readers-file` package not found") from e
32 |
33 | default_file_reader_cls: dict[str, type[BaseReader]] = {
34 | ".hwp": HWPReader,
35 | ".pdf": PDFReader,
36 | ".docx": DocxReader,
37 | ".pptx": PptxReader,
38 | ".ppt": PptxReader,
39 | ".pptm": PptxReader,
40 | ".jpg": ImageReader,
41 | ".png": ImageReader,
42 | ".jpeg": ImageReader,
43 | ".mp3": VideoAudioReader,
44 | ".mp4": VideoAudioReader,
45 | ".csv": PandasCSVReader,
46 | ".epub": EpubReader,
47 | ".md": MarkdownReader,
48 | ".mbox": MboxReader,
49 | ".ipynb": IPYNBReader,
50 | }
51 | return default_file_reader_cls
52 |
53 |
54 | # Patching the default file reader to support other file types
55 | FILE_READER_CLS = _try_loading_included_file_formats()
56 | FILE_READER_CLS.update(
57 | {
58 | ".json": JSONReader,
59 | }
60 | )
61 |
62 |
63 | class IngestionHelper:
64 | """Helper class to transform a file into a list of documents.
65 |
66 | This class should be used to transform a file into a list of documents.
67 | These methods are thread-safe (and multiprocessing-safe).
68 | """
69 |
70 | @staticmethod
71 | def transform_file_into_documents(
72 | file_name: str, file_data: Path
73 | ) -> list[Document]:
74 | documents = IngestionHelper._load_file_to_documents(file_name, file_data)
75 | for document in documents:
76 | document.metadata["file_name"] = file_name
77 | IngestionHelper._exclude_metadata(documents)
78 | return documents
79 |
80 | @staticmethod
81 | def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]:
82 | logger.debug("Transforming file_name=%s into documents", file_name)
83 | extension = Path(file_name).suffix
84 | reader_cls = FILE_READER_CLS.get(extension)
85 | if reader_cls is None:
86 | logger.debug(
87 | "No reader found for extension=%s, using default string reader",
88 | extension,
89 | )
90 | # Read as a plain text
91 | string_reader = StringIterableReader()
92 | return string_reader.load_data([file_data.read_text()])
93 |
94 | logger.debug("Specific reader found for extension=%s", extension)
95 | return reader_cls().load_data(file_data)
96 |
97 | @staticmethod
98 | def _exclude_metadata(documents: list[Document]) -> None:
99 | logger.debug("Excluding metadata from count=%s documents", len(documents))
100 | for document in documents:
101 | document.metadata["doc_id"] = document.doc_id
102 | # We don't want the Embeddings search to receive this metadata
103 | document.excluded_embed_metadata_keys = ["doc_id"]
104 | # We don't want the LLM to receive these metadata in the context
105 | document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"]
106 |
--------------------------------------------------------------------------------
/private_gpt/server/chat/chat_router.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter, Depends, Request
2 | from llama_index.core.llms import ChatMessage, MessageRole
3 | from pydantic import BaseModel
4 | from starlette.responses import StreamingResponse
5 |
6 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
7 | from private_gpt.open_ai.openai_models import (
8 | OpenAICompletion,
9 | OpenAIMessage,
10 | to_openai_response,
11 | to_openai_sse_stream,
12 | )
13 | from private_gpt.server.chat.chat_service import ChatService
14 | from private_gpt.server.utils.auth import authenticated
15 |
16 | chat_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
17 |
18 |
19 | class ChatBody(BaseModel):
20 | messages: list[OpenAIMessage]
21 | use_context: bool = False
22 | context_filter: ContextFilter | None = None
23 | include_sources: bool = True
24 | stream: bool = False
25 |
26 | model_config = {
27 | "json_schema_extra": {
28 | "examples": [
29 | {
30 | "messages": [
31 | {
32 | "role": "system",
33 | "content": "You are a rapper. Always answer with a rap.",
34 | },
35 | {
36 | "role": "user",
37 | "content": "How do you fry an egg?",
38 | },
39 | ],
40 | "stream": False,
41 | "use_context": True,
42 | "include_sources": True,
43 | "context_filter": {
44 | "docs_ids": ["c202d5e6-7b69-4869-81cc-dd574ee8ee11"]
45 | },
46 | }
47 | ]
48 | }
49 | }
50 |
51 |
52 | @chat_router.post(
53 | "/chat/completions",
54 | response_model=None,
55 | responses={200: {"model": OpenAICompletion}},
56 | tags=["Contextual Completions"],
57 | openapi_extra={
58 | "x-fern-streaming": {
59 | "stream-condition": "stream",
60 | "response": {"$ref": "#/components/schemas/OpenAICompletion"},
61 | "response-stream": {"$ref": "#/components/schemas/OpenAICompletion"},
62 | }
63 | },
64 | )
65 | def chat_completion(
66 | request: Request, body: ChatBody
67 | ) -> OpenAICompletion | StreamingResponse:
68 | """Given a list of messages comprising a conversation, return a response.
69 |
70 | Optionally include an initial `role: system` message to influence the way
71 | the LLM answers.
72 |
73 | If `use_context` is set to `true`, the model will use context coming
74 | from the ingested documents to create the response. The documents being used can
75 | be filtered using the `context_filter` and passing the document IDs to be used.
76 | Ingested documents IDs can be found using `/ingest/list` endpoint. If you want
77 | all ingested documents to be used, remove `context_filter` altogether.
78 |
79 | When using `'include_sources': true`, the API will return the source Chunks used
80 | to create the response, which come from the context provided.
81 |
82 | When using `'stream': true`, the API will return data chunks following [OpenAI's
83 | streaming model](https://platform.openai.com/docs/api-reference/chat/streaming):
84 | ```
85 | {"id":"12345","object":"completion.chunk","created":1694268190,
86 | "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"},
87 | "finish_reason":null}]}
88 | ```
89 | """
90 | service = request.state.injector.get(ChatService)
91 | all_messages = [
92 | ChatMessage(content=m.content, role=MessageRole(m.role)) for m in body.messages
93 | ]
94 | if body.stream:
95 | completion_gen = service.stream_chat(
96 | messages=all_messages,
97 | use_context=body.use_context,
98 | context_filter=body.context_filter,
99 | )
100 | return StreamingResponse(
101 | to_openai_sse_stream(
102 | completion_gen.response,
103 | completion_gen.sources if body.include_sources else None,
104 | ),
105 | media_type="text/event-stream",
106 | )
107 | else:
108 | completion = service.chat(
109 | messages=all_messages,
110 | use_context=body.use_context,
111 | context_filter=body.context_filter,
112 | )
113 | return to_openai_response(
114 | completion.response, completion.sources if body.include_sources else None
115 | )
116 |
--------------------------------------------------------------------------------
/private_gpt/utils/eta.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import logging
3 | import math
4 | import time
5 | from collections import deque
6 | from typing import Any
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | def human_time(*args: Any, **kwargs: Any) -> str:
12 | def timedelta_total_seconds(timedelta: datetime.timedelta) -> float:
13 | return (
14 | timedelta.microseconds
15 | + 0.0
16 | + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6
17 | ) / 10**6
18 |
19 | secs = float(timedelta_total_seconds(datetime.timedelta(*args, **kwargs)))
20 | # We want (ms) precision below 2 seconds
21 | if secs < 2:
22 | return f"{secs * 1000}ms"
23 | units = [("y", 86400 * 365), ("d", 86400), ("h", 3600), ("m", 60), ("s", 1)]
24 | parts = []
25 | for unit, mul in units:
26 | if secs / mul >= 1 or mul == 1:
27 | if mul > 1:
28 | n = int(math.floor(secs / mul))
29 | secs -= n * mul
30 | else:
31 | # >2s we drop the (ms) component.
32 | n = int(secs)
33 | if n:
34 | parts.append(f"{n}{unit}")
35 | return " ".join(parts)
36 |
37 |
38 | def eta(iterator: list[Any]) -> Any:
39 | """Report an ETA after 30s and every 60s thereafter."""
40 | total = len(iterator)
41 | _eta = ETA(total)
42 | _eta.needReport(30)
43 | for processed, data in enumerate(iterator, start=1):
44 | yield data
45 | _eta.update(processed)
46 | if _eta.needReport(60):
47 | logger.info(f"{processed}/{total} - ETA {_eta.human_time()}")
48 |
49 |
50 | class ETA:
51 | """Predict how long something will take to complete."""
52 |
53 | def __init__(self, total: int):
54 | self.total: int = total # Total expected records.
55 | self.rate: float = 0.0 # per second
56 | self._timing_data: deque[tuple[float, int]] = deque(maxlen=100)
57 | self.secondsLeft: float = 0.0
58 | self.nexttime: float = 0.0
59 |
60 | def human_time(self) -> str:
61 | if self._calc():
62 | return f"{human_time(seconds=self.secondsLeft)} @ {int(self.rate * 60)}/min"
63 | return "(computing)"
64 |
65 | def update(self, count: int) -> None:
66 | # count should be in the range 0 to self.total
67 | assert count > 0
68 | assert count <= self.total
69 | self._timing_data.append((time.time(), count)) # (X,Y) for pearson
70 |
71 | def needReport(self, whenSecs: int) -> bool:
72 | now = time.time()
73 | if now > self.nexttime:
74 | self.nexttime = now + whenSecs
75 | return True
76 | return False
77 |
78 | def _calc(self) -> bool:
79 | # A sample before a prediction. Need two points to compute slope!
80 | if len(self._timing_data) < 3:
81 | return False
82 |
83 | # http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
84 | # Calculate means and standard deviations.
85 | samples = len(self._timing_data)
86 | # column wise sum of the timing tuples to compute their mean.
87 | mean_x, mean_y = (
88 | sum(i) / samples for i in zip(*self._timing_data, strict=False)
89 | )
90 | std_x = math.sqrt(
91 | sum(pow(i[0] - mean_x, 2) for i in self._timing_data) / (samples - 1)
92 | )
93 | std_y = math.sqrt(
94 | sum(pow(i[1] - mean_y, 2) for i in self._timing_data) / (samples - 1)
95 | )
96 |
97 | # Calculate coefficient.
98 | sum_xy, sum_sq_v_x, sum_sq_v_y = 0.0, 0.0, 0
99 | for x, y in self._timing_data:
100 | x -= mean_x
101 | y -= mean_y
102 | sum_xy += x * y
103 | sum_sq_v_x += pow(x, 2)
104 | sum_sq_v_y += pow(y, 2)
105 | pearson_r = sum_xy / math.sqrt(sum_sq_v_x * sum_sq_v_y)
106 |
107 | # Calculate regression line.
108 | # y = mx + b where m is the slope and b is the y-intercept.
109 | m = self.rate = pearson_r * (std_y / std_x)
110 | y = self.total
111 | b = mean_y - m * mean_x
112 | x = (y - b) / m
113 |
114 | # Calculate fitted line (transformed/shifted regression line horizontally).
115 | fitted_b = self._timing_data[-1][1] - (m * self._timing_data[-1][0])
116 | fitted_x = (y - fitted_b) / m
117 | _, count = self._timing_data[-1] # adjust last data point progress count
118 | adjusted_x = ((fitted_x - x) * (count / self.total)) + x
119 | eta_epoch = adjusted_x
120 |
121 | self.secondsLeft = max([eta_epoch - time.time(), 0])
122 | return True
123 |
--------------------------------------------------------------------------------
/private_gpt/server/ingest/ingest_router.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
4 | from pydantic import BaseModel, Field
5 |
6 | from private_gpt.server.ingest.ingest_service import IngestService
7 | from private_gpt.server.ingest.model import IngestedDoc
8 | from private_gpt.server.utils.auth import authenticated
9 |
10 | ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)])
11 |
12 |
13 | class IngestTextBody(BaseModel):
14 | file_name: str = Field(examples=["Avatar: The Last Airbender"])
15 | text: str = Field(
16 | examples=[
17 | "Avatar is set in an Asian and Arctic-inspired world in which some "
18 | "people can telekinetically manipulate one of the four elements—water, "
19 | "earth, fire or air—through practices known as 'bending', inspired by "
20 | "Chinese martial arts."
21 | ]
22 | )
23 |
24 |
25 | class IngestResponse(BaseModel):
26 | object: Literal["list"]
27 | model: Literal["private-gpt"]
28 | data: list[IngestedDoc]
29 |
30 |
31 | @ingest_router.post("/ingest", tags=["Ingestion"], deprecated=True)
32 | def ingest(request: Request, file: UploadFile) -> IngestResponse:
33 | """Ingests and processes a file.
34 |
35 | Deprecated. Use ingest/file instead.
36 | """
37 | return ingest_file(request, file)
38 |
39 |
40 | @ingest_router.post("/ingest/file", tags=["Ingestion"])
41 | def ingest_file(request: Request, file: UploadFile) -> IngestResponse:
42 | """Ingests and processes a file, storing its chunks to be used as context.
43 |
44 | The context obtained from files is later used in
45 | `/chat/completions`, `/completions`, and `/chunks` APIs.
46 |
47 | Most common document
48 | formats are supported, but you may be prompted to install an extra dependency to
49 | manage a specific file type.
50 |
51 | A file can generate different Documents (for example a PDF generates one Document
52 | per page). All Documents IDs are returned in the response, together with the
53 | extracted Metadata (which is later used to improve context retrieval). Those IDs
54 | can be used to filter the context used to create responses in
55 | `/chat/completions`, `/completions`, and `/chunks` APIs.
56 | """
57 | service = request.state.injector.get(IngestService)
58 | if file.filename is None:
59 | raise HTTPException(400, "No file name provided")
60 | ingested_documents = service.ingest_bin_data(file.filename, file.file)
61 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
62 |
63 |
64 | @ingest_router.post("/ingest/text", tags=["Ingestion"])
65 | def ingest_text(request: Request, body: IngestTextBody) -> IngestResponse:
66 | """Ingests and processes a text, storing its chunks to be used as context.
67 |
68 | The context obtained from files is later used in
69 | `/chat/completions`, `/completions`, and `/chunks` APIs.
70 |
71 | A Document will be generated with the given text. The Document
72 | ID is returned in the response, together with the
73 | extracted Metadata (which is later used to improve context retrieval). That ID
74 | can be used to filter the context used to create responses in
75 | `/chat/completions`, `/completions`, and `/chunks` APIs.
76 | """
77 | service = request.state.injector.get(IngestService)
78 | if len(body.file_name) == 0:
79 | raise HTTPException(400, "No file name provided")
80 | ingested_documents = service.ingest_text(body.file_name, body.text)
81 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
82 |
83 |
84 | @ingest_router.get("/ingest/list", tags=["Ingestion"])
85 | def list_ingested(request: Request) -> IngestResponse:
86 | """Lists already ingested Documents including their Document ID and metadata.
87 |
88 | Those IDs can be used to filter the context used to create responses
89 | in `/chat/completions`, `/completions`, and `/chunks` APIs.
90 | """
91 | service = request.state.injector.get(IngestService)
92 | ingested_documents = service.list_ingested()
93 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents)
94 |
95 |
96 | @ingest_router.delete("/ingest/{doc_id}", tags=["Ingestion"])
97 | def delete_ingested(request: Request, doc_id: str) -> None:
98 | """Delete the specified ingested Document.
99 |
100 | The `doc_id` can be obtained from the `GET /ingest/list` endpoint.
101 | The document will be effectively deleted from your storage context.
102 | """
103 | service = request.state.injector.get(IngestService)
104 | service.delete(doc_id)
105 |
--------------------------------------------------------------------------------
/private_gpt/ui/images.py:
--------------------------------------------------------------------------------
1 | logo_svg = ""
2 |
--------------------------------------------------------------------------------
/tests/test_prompt_helper.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from llama_index.core.llms import ChatMessage, MessageRole
3 |
4 | from private_gpt.components.llm.prompt_helper import (
5 | ChatMLPromptStyle,
6 | DefaultPromptStyle,
7 | Llama2PromptStyle,
8 | MistralPromptStyle,
9 | TagPromptStyle,
10 | get_prompt_style,
11 | )
12 |
13 |
14 | @pytest.mark.parametrize(
15 | ("prompt_style", "expected_prompt_style"),
16 | [
17 | ("default", DefaultPromptStyle),
18 | ("llama2", Llama2PromptStyle),
19 | ("tag", TagPromptStyle),
20 | ("mistral", MistralPromptStyle),
21 | ("chatml", ChatMLPromptStyle),
22 | ],
23 | )
24 | def test_get_prompt_style_success(prompt_style, expected_prompt_style):
25 | assert isinstance(get_prompt_style(prompt_style), expected_prompt_style)
26 |
27 |
28 | def test_get_prompt_style_failure():
29 | prompt_style = "unknown"
30 | with pytest.raises(ValueError) as exc_info:
31 | get_prompt_style(prompt_style)
32 | assert str(exc_info.value) == f"Unknown prompt_style='{prompt_style}'"
33 |
34 |
35 | def test_tag_prompt_style_format():
36 | prompt_style = TagPromptStyle()
37 | messages = [
38 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM),
39 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
40 | ]
41 |
42 | expected_prompt = (
43 | "<|system|>: You are an AI assistant.\n"
44 | "<|user|>: Hello, how are you doing?\n"
45 | "<|assistant|>: "
46 | )
47 |
48 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
49 |
50 |
51 | def test_tag_prompt_style_format_with_system_prompt():
52 | prompt_style = TagPromptStyle()
53 | messages = [
54 | ChatMessage(
55 | content="FOO BAR Custom sys prompt from messages.", role=MessageRole.SYSTEM
56 | ),
57 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
58 | ]
59 |
60 | expected_prompt = (
61 | "<|system|>: FOO BAR Custom sys prompt from messages.\n"
62 | "<|user|>: Hello, how are you doing?\n"
63 | "<|assistant|>: "
64 | )
65 |
66 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
67 |
68 |
69 | def test_mistral_prompt_style_format():
70 | prompt_style = MistralPromptStyle()
71 | messages = [
72 | ChatMessage(content="A", role=MessageRole.SYSTEM),
73 | ChatMessage(content="B", role=MessageRole.USER),
74 | ]
75 | expected_prompt = "[INST] A\nB [/INST]"
76 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
77 |
78 | messages2 = [
79 | ChatMessage(content="A", role=MessageRole.SYSTEM),
80 | ChatMessage(content="B", role=MessageRole.USER),
81 | ChatMessage(content="C", role=MessageRole.ASSISTANT),
82 | ChatMessage(content="D", role=MessageRole.USER),
83 | ]
84 | expected_prompt2 = "[INST] A\nB [/INST] C[INST] D [/INST]"
85 | assert prompt_style.messages_to_prompt(messages2) == expected_prompt2
86 |
87 |
88 | def test_chatml_prompt_style_format():
89 | prompt_style = ChatMLPromptStyle()
90 | messages = [
91 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM),
92 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
93 | ]
94 |
95 | expected_prompt = (
96 | "<|im_start|>system\n"
97 | "You are an AI assistant.<|im_end|>\n"
98 | "<|im_start|>user\n"
99 | "Hello, how are you doing?<|im_end|>\n"
100 | "<|im_start|>assistant\n"
101 | )
102 |
103 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
104 |
105 |
106 | def test_llama2_prompt_style_format():
107 | prompt_style = Llama2PromptStyle()
108 | messages = [
109 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM),
110 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
111 | ]
112 |
113 | expected_prompt = (
114 | " [INST] <>\n"
115 | " You are an AI assistant. \n"
116 | "<>\n"
117 | "\n"
118 | " Hello, how are you doing? [/INST]"
119 | )
120 |
121 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
122 |
123 |
124 | def test_llama2_prompt_style_with_system_prompt():
125 | prompt_style = Llama2PromptStyle()
126 | messages = [
127 | ChatMessage(
128 | content="FOO BAR Custom sys prompt from messages.", role=MessageRole.SYSTEM
129 | ),
130 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER),
131 | ]
132 |
133 | expected_prompt = (
134 | " [INST] <>\n"
135 | " FOO BAR Custom sys prompt from messages. \n"
136 | "<>\n"
137 | "\n"
138 | " Hello, how are you doing? [/INST]"
139 | )
140 |
141 | assert prompt_style.messages_to_prompt(messages) == expected_prompt
142 |
--------------------------------------------------------------------------------
/private_gpt/server/chunks/chunks_service.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING, Literal
2 |
3 | from injector import inject, singleton
4 | from llama_index.core.indices import VectorStoreIndex
5 | from llama_index.core.schema import NodeWithScore
6 | from llama_index.core.storage import StorageContext
7 | from pydantic import BaseModel, Field
8 |
9 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent
10 | from private_gpt.components.llm.llm_component import LLMComponent
11 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent
12 | from private_gpt.components.vector_store.vector_store_component import (
13 | VectorStoreComponent,
14 | )
15 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
16 | from private_gpt.server.ingest.model import IngestedDoc
17 |
18 | if TYPE_CHECKING:
19 | from llama_index.core.schema import RelatedNodeInfo
20 |
21 |
22 | class Chunk(BaseModel):
23 | object: Literal["context.chunk"]
24 | score: float = Field(examples=[0.023])
25 | document: IngestedDoc
26 | text: str = Field(examples=["Outbound sales increased 20%, driven by new leads."])
27 | previous_texts: list[str] | None = Field(
28 | default=None,
29 | examples=[["SALES REPORT 2023", "Inbound didn't show major changes."]],
30 | )
31 | next_texts: list[str] | None = Field(
32 | default=None,
33 | examples=[
34 | [
35 | "New leads came from Google Ads campaign.",
36 | "The campaign was run by the Marketing Department",
37 | ]
38 | ],
39 | )
40 |
41 | @classmethod
42 | def from_node(cls: type["Chunk"], node: NodeWithScore) -> "Chunk":
43 | doc_id = node.node.ref_doc_id if node.node.ref_doc_id is not None else "-"
44 | return cls(
45 | object="context.chunk",
46 | score=node.score or 0.0,
47 | document=IngestedDoc(
48 | object="ingest.document",
49 | doc_id=doc_id,
50 | doc_metadata=node.metadata,
51 | ),
52 | text=node.get_content(),
53 | )
54 |
55 |
56 | @singleton
57 | class ChunksService:
58 | @inject
59 | def __init__(
60 | self,
61 | llm_component: LLMComponent,
62 | vector_store_component: VectorStoreComponent,
63 | embedding_component: EmbeddingComponent,
64 | node_store_component: NodeStoreComponent,
65 | ) -> None:
66 | self.vector_store_component = vector_store_component
67 | self.llm_component = llm_component
68 | self.embedding_component = embedding_component
69 | self.storage_context = StorageContext.from_defaults(
70 | vector_store=vector_store_component.vector_store,
71 | docstore=node_store_component.doc_store,
72 | index_store=node_store_component.index_store,
73 | )
74 |
75 | def _get_sibling_nodes_text(
76 | self, node_with_score: NodeWithScore, related_number: int, forward: bool = True
77 | ) -> list[str]:
78 | explored_nodes_texts = []
79 | current_node = node_with_score.node
80 | for _ in range(related_number):
81 | explored_node_info: RelatedNodeInfo | None = (
82 | current_node.next_node if forward else current_node.prev_node
83 | )
84 | if explored_node_info is None:
85 | break
86 |
87 | explored_node = self.storage_context.docstore.get_node(
88 | explored_node_info.node_id
89 | )
90 |
91 | explored_nodes_texts.append(explored_node.get_content())
92 | current_node = explored_node
93 |
94 | return explored_nodes_texts
95 |
96 | def retrieve_relevant(
97 | self,
98 | text: str,
99 | context_filter: ContextFilter | None = None,
100 | limit: int = 10,
101 | prev_next_chunks: int = 0,
102 | ) -> list[Chunk]:
103 | index = VectorStoreIndex.from_vector_store(
104 | self.vector_store_component.vector_store,
105 | storage_context=self.storage_context,
106 | llm=self.llm_component.llm,
107 | embed_model=self.embedding_component.embedding_model,
108 | show_progress=True,
109 | )
110 | vector_index_retriever = self.vector_store_component.get_retriever(
111 | index=index, context_filter=context_filter, similarity_top_k=limit
112 | )
113 | nodes = vector_index_retriever.retrieve(text)
114 | nodes.sort(key=lambda n: n.score or 0.0, reverse=True)
115 |
116 | retrieved_nodes = []
117 | for node in nodes:
118 | chunk = Chunk.from_node(node)
119 | chunk.previous_texts = self._get_sibling_nodes_text(
120 | node, prev_next_chunks, False
121 | )
122 | chunk.next_texts = self._get_sibling_nodes_text(node, prev_next_chunks)
123 | retrieved_nodes.append(chunk)
124 |
125 | return retrieved_nodes
126 |
--------------------------------------------------------------------------------
/settings.yaml:
--------------------------------------------------------------------------------
1 | # The default configuration file.
2 | # More information about configuration can be found in the documentation: https://docs.privategpt.dev/
3 | # Syntax in `private_pgt/settings/settings.py`
4 | server:
5 | env_name: ${APP_ENV:prod}
6 | port: ${PORT:8001}
7 | cors:
8 | enabled: true
9 | allow_origins: ["*"]
10 | allow_methods: ["*"]
11 | allow_headers: ["*"]
12 | auth:
13 | enabled: false
14 | # python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())'
15 | # 'secret' is the username and 'key' is the password for basic auth by default
16 | # If the auth is enabled, this value must be set in the "Authorization" header of the request.
17 | secret: "Basic c2VjcmV0OmtleQ=="
18 |
19 | data:
20 | local_data_folder: local_data/private_gpt
21 |
22 | ui:
23 | enabled: true
24 | path: /
25 | default_chat_system_prompt: >
26 | You are a helpful, respectful and honest assistant.
27 | Always answer as helpfully as possible and follow ALL given instructions.
28 | Do not speculate or make up information.
29 | Do not reference any given instructions or context.
30 | default_query_system_prompt: >
31 | You can only answer questions about the provided context.
32 | If you know the answer but it is not based in the provided context, don't provide
33 | the answer, just state the answer is not in the context provided.
34 | delete_file_button_enabled: true
35 | delete_all_files_button_enabled: true
36 |
37 | llm:
38 | mode: llamacpp
39 | prompt_style: "mistral"
40 | # Should be matching the selected model
41 | max_new_tokens: 512
42 | context_window: 3900
43 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2
44 | temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
45 |
46 | rag:
47 | similarity_top_k: 2
48 | #This value controls how many "top" documents the RAG returns to use in the context.
49 | #similarity_value: 0.45
50 | #This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score.
51 | rerank:
52 | enabled: false
53 | model: cross-encoder/ms-marco-MiniLM-L-2-v2
54 | top_n: 1
55 |
56 | clickhouse:
57 | host: localhost
58 | port: 8443
59 | username: admin
60 | password: clickhouse
61 | database: embeddings
62 |
63 | llamacpp:
64 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
65 | llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf
66 | tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting
67 | top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)
68 | top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
69 | repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)
70 |
71 | embedding:
72 | # Should be matching the value above in most cases
73 | mode: huggingface
74 | ingest_mode: simple
75 | embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5
76 |
77 | huggingface:
78 | embedding_hf_model_name: BAAI/bge-small-en-v1.5
79 | access_token: ${HUGGINGFACE_TOKEN:}
80 |
81 | vectorstore:
82 | database: qdrant
83 |
84 | nodestore:
85 | database: simple
86 |
87 | qdrant:
88 | path: local_data/private_gpt/qdrant
89 |
90 | postgres:
91 | host: localhost
92 | port: 5432
93 | database: postgres
94 | user: postgres
95 | password: postgres
96 | schema_name: private_gpt
97 |
98 | sagemaker:
99 | llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140
100 | embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479
101 |
102 | openai:
103 | api_key: ${OPENAI_API_KEY:}
104 | model: gpt-3.5-turbo
105 | embedding_api_key: ${OPENAI_API_KEY:}
106 |
107 | ollama:
108 | llm_model: llama2
109 | embedding_model: nomic-embed-text
110 | api_base: http://localhost:11434
111 | embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama
112 | keep_alive: 5m
113 | request_timeout: 120.0
114 |
115 | azopenai:
116 | api_key: ${AZ_OPENAI_API_KEY:}
117 | azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
118 | embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
119 | llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
120 | api_version: "2023-05-15"
121 | embedding_model: text-embedding-ada-002
122 | llm_model: gpt-35-turbo
123 |
124 | gemini:
125 | api_key: ${GOOGLE_API_KEY:}
126 | model: models/gemini-pro
127 | embedding_model: models/embedding-001
128 |
--------------------------------------------------------------------------------
/fern/docs/pages/manual/ingestion.mdx:
--------------------------------------------------------------------------------
1 | # Ingesting & Managing Documents
2 |
3 | The ingestion of documents can be done in different ways:
4 |
5 | * Using the `/ingest` API
6 | * Using the Gradio UI
7 | * Using the Bulk Local Ingestion functionality (check next section)
8 |
9 | ## Bulk Local Ingestion
10 |
11 | When you are running PrivateGPT in a fully local setup, you can ingest a complete folder for convenience (containing
12 | pdf, text files, etc.)
13 | and optionally watch changes on it with the command:
14 |
15 | ```bash
16 | make ingest /path/to/folder -- --watch
17 | ```
18 |
19 | To log the processed and failed files to an additional file, use:
20 |
21 | ```bash
22 | make ingest /path/to/folder -- --watch --log-file /path/to/log/file.log
23 | ```
24 |
25 | **Note for Windows Users:** Depending on your Windows version and whether you are using PowerShell to execute
26 | PrivateGPT API calls, you may need to include the parameter name before passing the folder path for consumption:
27 |
28 | ```bash
29 | make ingest arg=/path/to/folder -- --watch --log-file /path/to/log/file.log
30 | ```
31 |
32 | After ingestion is complete, you should be able to chat with your documents
33 | by navigating to http://localhost:8001 and using the option `Query documents`,
34 | or using the completions / chat API.
35 |
36 | ## Ingestion troubleshooting
37 |
38 | ### Running out of memory
39 |
40 | To do not run out of memory, you should ingest your documents without the LLM loaded in your (video) memory.
41 | To do so, you should change your configuration to set `llm.mode: mock`.
42 |
43 | You can also use the existing `PGPT_PROFILES=mock` that will set the following configuration for you:
44 |
45 | ```yaml
46 | llm:
47 | mode: mock
48 | embedding:
49 | mode: local
50 | ```
51 |
52 | This configuration allows you to use hardware acceleration for creating embeddings while avoiding loading the full LLM into (video) memory.
53 |
54 | Once your documents are ingested, you can set the `llm.mode` value back to `local` (or your previous custom value).
55 |
56 | ### Ingestion speed
57 |
58 | The ingestion speed depends on the number of documents you are ingesting, and the size of each document.
59 | To speed up the ingestion, you can change the ingestion mode in configuration.
60 |
61 | The following ingestion mode exist:
62 | * `simple`: historic behavior, ingest one document at a time, sequentially
63 | * `batch`: read, parse, and embed multiple documents using batches (batch read, and then batch parse, and then batch embed)
64 | * `parallel`: read, parse, and embed multiple documents in parallel. This is the fastest ingestion mode for local setup.
65 | * `pipeline`: Alternative to parallel.
66 | To change the ingestion mode, you can use the `embedding.ingest_mode` configuration value. The default value is `simple`.
67 |
68 | To configure the number of workers used for parallel or batched ingestion, you can use
69 | the `embedding.count_workers` configuration value. If you set this value too high, you might run out of
70 | memory, so be mindful when setting this value. The default value is `2`.
71 | For `batch` mode, you can easily set this value to your number of threads available on your CPU without
72 | running out of memory. For `parallel` mode, you should be more careful, and set this value to a lower value.
73 |
74 | The configuration below should be enough for users who want to stress more their hardware:
75 | ```yaml
76 | embedding:
77 | ingest_mode: parallel
78 | count_workers: 4
79 | ```
80 |
81 | If your hardware is powerful enough, and that you are loading heavy documents, you can increase the number of workers.
82 | It is recommended to do your own tests to find the optimal value for your hardware.
83 |
84 | If you have a `bash` shell, you can use this set of command to do your own benchmark:
85 |
86 | ```bash
87 | # Wipe your local data, to put yourself in a clean state
88 | # This will delete all your ingested documents
89 | make wipe
90 |
91 | time PGPT_PROFILES=mock python ./scripts/ingest_folder.py ~/my-dir/to-ingest/
92 | ```
93 |
94 | ## Supported file formats
95 |
96 | privateGPT by default supports all the file formats that contains clear text (for example, `.txt` files, `.html`, etc.).
97 | However, these text based file formats as only considered as text files, and are not pre-processed in any other way.
98 |
99 | It also supports the following file formats:
100 | * `.hwp`
101 | * `.pdf`
102 | * `.docx`
103 | * `.pptx`
104 | * `.ppt`
105 | * `.pptm`
106 | * `.jpg`
107 | * `.png`
108 | * `.jpeg`
109 | * `.mp3`
110 | * `.mp4`
111 | * `.csv`
112 | * `.epub`
113 | * `.md`
114 | * `.mbox`
115 | * `.ipynb`
116 | * `.json`
117 |
118 | **Please note the following nuance**: while `privateGPT` supports these file formats, it **might** require additional
119 | dependencies to be installed in your python's virtual environment.
120 | For example, if you try to ingest `.epub` files, `privateGPT` might fail to do it, and will instead display an
121 | explanatory error asking you to download the necessary dependencies to install this file format.
122 |
123 |
124 | **Other file formats might work**, but they will be considered as plain text
125 | files (in other words, they will be ingested as `.txt` files).
--------------------------------------------------------------------------------
/private_gpt/components/embedding/embedding_component.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from injector import inject, singleton
4 | from llama_index.core.embeddings import BaseEmbedding, MockEmbedding
5 |
6 | from private_gpt.paths import models_cache_path
7 | from private_gpt.settings.settings import Settings
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | @singleton
13 | class EmbeddingComponent:
14 | embedding_model: BaseEmbedding
15 |
16 | @inject
17 | def __init__(self, settings: Settings) -> None:
18 | embedding_mode = settings.embedding.mode
19 | logger.info("Initializing the embedding model in mode=%s", embedding_mode)
20 | match embedding_mode:
21 | case "huggingface":
22 | try:
23 | from llama_index.embeddings.huggingface import ( # type: ignore
24 | HuggingFaceEmbedding,
25 | )
26 | except ImportError as e:
27 | raise ImportError(
28 | "Local dependencies not found, install with `poetry install --extras embeddings-huggingface`"
29 | ) from e
30 |
31 | self.embedding_model = HuggingFaceEmbedding(
32 | model_name=settings.huggingface.embedding_hf_model_name,
33 | cache_folder=str(models_cache_path),
34 | )
35 | case "sagemaker":
36 | try:
37 | from private_gpt.components.embedding.custom.sagemaker import (
38 | SagemakerEmbedding,
39 | )
40 | except ImportError as e:
41 | raise ImportError(
42 | "Sagemaker dependencies not found, install with `poetry install --extras embeddings-sagemaker`"
43 | ) from e
44 |
45 | self.embedding_model = SagemakerEmbedding(
46 | endpoint_name=settings.sagemaker.embedding_endpoint_name,
47 | )
48 | case "openai":
49 | try:
50 | from llama_index.embeddings.openai import ( # type: ignore
51 | OpenAIEmbedding,
52 | )
53 | except ImportError as e:
54 | raise ImportError(
55 | "OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`"
56 | ) from e
57 |
58 | api_base = (
59 | settings.openai.embedding_api_base or settings.openai.api_base
60 | )
61 | api_key = settings.openai.embedding_api_key or settings.openai.api_key
62 | model = settings.openai.embedding_model
63 |
64 | self.embedding_model = OpenAIEmbedding(
65 | api_base=api_base,
66 | api_key=api_key,
67 | model=model,
68 | )
69 | case "ollama":
70 | try:
71 | from llama_index.embeddings.ollama import ( # type: ignore
72 | OllamaEmbedding,
73 | )
74 | except ImportError as e:
75 | raise ImportError(
76 | "Local dependencies not found, install with `poetry install --extras embeddings-ollama`"
77 | ) from e
78 |
79 | ollama_settings = settings.ollama
80 | self.embedding_model = OllamaEmbedding(
81 | model_name=ollama_settings.embedding_model,
82 | base_url=ollama_settings.embedding_api_base,
83 | )
84 | case "azopenai":
85 | try:
86 | from llama_index.embeddings.azure_openai import ( # type: ignore
87 | AzureOpenAIEmbedding,
88 | )
89 | except ImportError as e:
90 | raise ImportError(
91 | "Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`"
92 | ) from e
93 |
94 | azopenai_settings = settings.azopenai
95 | self.embedding_model = AzureOpenAIEmbedding(
96 | model=azopenai_settings.embedding_model,
97 | deployment_name=azopenai_settings.embedding_deployment_name,
98 | api_key=azopenai_settings.api_key,
99 | azure_endpoint=azopenai_settings.azure_endpoint,
100 | api_version=azopenai_settings.api_version,
101 | )
102 | case "gemini":
103 | try:
104 | from llama_index.embeddings.gemini import ( # type: ignore
105 | GeminiEmbedding,
106 | )
107 | except ImportError as e:
108 | raise ImportError(
109 | "Gemini dependencies not found, install with `poetry install --extras embeddings-gemini`"
110 | ) from e
111 |
112 | self.embedding_model = GeminiEmbedding(
113 | api_key=settings.gemini.api_key,
114 | model_name=settings.gemini.embedding_model,
115 | )
116 | case "mock":
117 | # Not a random number, is the dimensionality used by
118 | # the default embedding model
119 | self.embedding_model = MockEmbedding(384)
120 |
--------------------------------------------------------------------------------
/private_gpt/server/ingest/ingest_service.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import tempfile
3 | from pathlib import Path
4 | from typing import TYPE_CHECKING, AnyStr, BinaryIO
5 |
6 | from injector import inject, singleton
7 | from llama_index.core.node_parser import SentenceWindowNodeParser
8 | from llama_index.core.storage import StorageContext
9 |
10 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent
11 | from private_gpt.components.ingest.ingest_component import get_ingestion_component
12 | from private_gpt.components.llm.llm_component import LLMComponent
13 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent
14 | from private_gpt.components.vector_store.vector_store_component import (
15 | VectorStoreComponent,
16 | )
17 | from private_gpt.server.ingest.model import IngestedDoc
18 | from private_gpt.settings.settings import settings
19 |
20 | if TYPE_CHECKING:
21 | from llama_index.core.storage.docstore.types import RefDocInfo
22 |
23 | logger = logging.getLogger(__name__)
24 |
25 |
26 | @singleton
27 | class IngestService:
28 | @inject
29 | def __init__(
30 | self,
31 | llm_component: LLMComponent,
32 | vector_store_component: VectorStoreComponent,
33 | embedding_component: EmbeddingComponent,
34 | node_store_component: NodeStoreComponent,
35 | ) -> None:
36 | self.llm_service = llm_component
37 | self.storage_context = StorageContext.from_defaults(
38 | vector_store=vector_store_component.vector_store,
39 | docstore=node_store_component.doc_store,
40 | index_store=node_store_component.index_store,
41 | )
42 | node_parser = SentenceWindowNodeParser.from_defaults()
43 |
44 | self.ingest_component = get_ingestion_component(
45 | self.storage_context,
46 | embed_model=embedding_component.embedding_model,
47 | transformations=[node_parser, embedding_component.embedding_model],
48 | settings=settings(),
49 | )
50 |
51 | def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]:
52 | logger.debug("Got file data of size=%s to ingest", len(file_data))
53 | # llama-index mainly supports reading from files, so
54 | # we have to create a tmp file to read for it to work
55 | # delete=False to avoid a Windows 11 permission error.
56 | with tempfile.NamedTemporaryFile(delete=False) as tmp:
57 | try:
58 | path_to_tmp = Path(tmp.name)
59 | if isinstance(file_data, bytes):
60 | path_to_tmp.write_bytes(file_data)
61 | else:
62 | path_to_tmp.write_text(str(file_data))
63 | return self.ingest_file(file_name, path_to_tmp)
64 | finally:
65 | tmp.close()
66 | path_to_tmp.unlink()
67 |
68 | def ingest_file(self, file_name: str, file_data: Path) -> list[IngestedDoc]:
69 | logger.info("Ingesting file_name=%s", file_name)
70 | documents = self.ingest_component.ingest(file_name, file_data)
71 | logger.info("Finished ingestion file_name=%s", file_name)
72 | return [IngestedDoc.from_document(document) for document in documents]
73 |
74 | def ingest_text(self, file_name: str, text: str) -> list[IngestedDoc]:
75 | logger.debug("Ingesting text data with file_name=%s", file_name)
76 | return self._ingest_data(file_name, text)
77 |
78 | def ingest_bin_data(
79 | self, file_name: str, raw_file_data: BinaryIO
80 | ) -> list[IngestedDoc]:
81 | logger.debug("Ingesting binary data with file_name=%s", file_name)
82 | file_data = raw_file_data.read()
83 | return self._ingest_data(file_name, file_data)
84 |
85 | def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]:
86 | logger.info("Ingesting file_names=%s", [f[0] for f in files])
87 | documents = self.ingest_component.bulk_ingest(files)
88 | logger.info("Finished ingestion file_name=%s", [f[0] for f in files])
89 | return [IngestedDoc.from_document(document) for document in documents]
90 |
91 | def list_ingested(self) -> list[IngestedDoc]:
92 | ingested_docs: list[IngestedDoc] = []
93 | try:
94 | docstore = self.storage_context.docstore
95 | ref_docs: dict[str, RefDocInfo] | None = docstore.get_all_ref_doc_info()
96 |
97 | if not ref_docs:
98 | return ingested_docs
99 |
100 | for doc_id, ref_doc_info in ref_docs.items():
101 | doc_metadata = None
102 | if ref_doc_info is not None and ref_doc_info.metadata is not None:
103 | doc_metadata = IngestedDoc.curate_metadata(ref_doc_info.metadata)
104 | ingested_docs.append(
105 | IngestedDoc(
106 | object="ingest.document",
107 | doc_id=doc_id,
108 | doc_metadata=doc_metadata,
109 | )
110 | )
111 | except ValueError:
112 | logger.warning("Got an exception when getting list of docs", exc_info=True)
113 | pass
114 | logger.debug("Found count=%s ingested documents", len(ingested_docs))
115 | return ingested_docs
116 |
117 | def delete(self, doc_id: str) -> None:
118 | """Delete an ingested document.
119 |
120 | :raises ValueError: if the document does not exist
121 | """
122 | logger.info(
123 | "Deleting the ingested document=%s in the doc and index store", doc_id
124 | )
125 | self.ingest_component.delete(doc_id)
126 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "private-gpt"
3 | version = "0.5.0"
4 | description = "Private GPT"
5 | authors = ["Zylon "]
6 |
7 | [tool.poetry.dependencies]
8 | python = ">=3.11,<3.12"
9 | # PrivateGPT
10 | fastapi = { extras = ["all"], version = "^0.111.0" }
11 | python-multipart = "^0.0.9"
12 | injector = "^0.21.0"
13 | pyyaml = "^6.0.1"
14 | watchdog = "^4.0.1"
15 | transformers = "^4.42.3"
16 | docx2txt = "^0.8"
17 | cryptography = "^3.1"
18 | # LlamaIndex core libs
19 | llama-index-core = "^0.10.52"
20 | llama-index-readers-file = "^0.1.27"
21 | # Optional LlamaIndex integration libs
22 | llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true}
23 | llama-index-llms-openai = {version = "^0.1.25", optional = true}
24 | llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
25 | llama-index-llms-ollama = {version ="^0.1.5", optional = true}
26 | llama-index-llms-azure-openai = {version ="^0.1.8", optional = true}
27 | llama-index-llms-gemini = {version ="^0.1.11", optional = true}
28 | llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
29 | llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true}
30 | llama-index-embeddings-openai = {version ="^0.1.10", optional = true}
31 | llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true}
32 | llama-index-embeddings-gemini = {version ="^0.1.8", optional = true}
33 | llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true}
34 | llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true}
35 | llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true}
36 | llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true}
37 | llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true}
38 | llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true}
39 | # Postgres
40 | psycopg2-binary = {version ="^2.9.9", optional = true}
41 | asyncpg = {version="^0.29.0", optional = true}
42 |
43 | # ClickHouse
44 | clickhouse-connect = {version = "^0.7.15", optional = true}
45 |
46 | # Optional Sagemaker dependency
47 | boto3 = {version ="^1.34.139", optional = true}
48 |
49 | # Optional Qdrant client
50 | qdrant-client = {version ="^1.9.0", optional = true}
51 |
52 | # Optional Reranker dependencies
53 | torch = {version ="^2.3.1", optional = true}
54 | sentence-transformers = {version ="^3.0.1", optional = true}
55 |
56 | # Optional UI
57 | gradio = {version ="^4.37.2", optional = true}
58 |
59 | # Optional Google Gemini dependency
60 | google-generativeai = {version ="^0.5.4", optional = true}
61 |
62 | [tool.poetry.extras]
63 | ui = ["gradio"]
64 | llms-llama-cpp = ["llama-index-llms-llama-cpp"]
65 | llms-openai = ["llama-index-llms-openai"]
66 | llms-openai-like = ["llama-index-llms-openai-like"]
67 | llms-ollama = ["llama-index-llms-ollama"]
68 | llms-sagemaker = ["boto3"]
69 | llms-azopenai = ["llama-index-llms-azure-openai"]
70 | llms-gemini = ["llama-index-llms-gemini", "google-generativeai"]
71 | embeddings-ollama = ["llama-index-embeddings-ollama"]
72 | embeddings-huggingface = ["llama-index-embeddings-huggingface"]
73 | embeddings-openai = ["llama-index-embeddings-openai"]
74 | embeddings-sagemaker = ["boto3"]
75 | embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
76 | embeddings-gemini = ["llama-index-embeddings-gemini"]
77 | vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
78 | vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"]
79 | vector-stores-chroma = ["llama-index-vector-stores-chroma"]
80 | vector-stores-postgres = ["llama-index-vector-stores-postgres"]
81 | storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"]
82 | rerank-sentence-transformers = ["torch", "sentence-transformers"]
83 |
84 | [tool.poetry.group.dev.dependencies]
85 | black = "^22"
86 | mypy = "^1.2"
87 | pre-commit = "^2"
88 | pytest = "^7"
89 | pytest-cov = "^3"
90 | ruff = "^0"
91 | pytest-asyncio = "^0.21.1"
92 | types-pyyaml = "^6.0.12.12"
93 |
94 | [build-system]
95 | requires = ["poetry-core>=1.0.0"]
96 | build-backend = "poetry.core.masonry.api"
97 |
98 | # Packages configs
99 |
100 | ## coverage
101 |
102 | [tool.coverage.run]
103 | branch = true
104 |
105 | [tool.coverage.report]
106 | skip_empty = true
107 | precision = 2
108 |
109 | ## black
110 |
111 | [tool.black]
112 | target-version = ['py311']
113 |
114 | ## ruff
115 | # Recommended ruff config for now, to be updated as we go along.
116 | [tool.ruff]
117 | target-version = 'py311'
118 |
119 | # See all rules at https://beta.ruff.rs/docs/rules/
120 | select = [
121 | "E", # pycodestyle
122 | "W", # pycodestyle
123 | "F", # Pyflakes
124 | "B", # flake8-bugbear
125 | "C4", # flake8-comprehensions
126 | "D", # pydocstyle
127 | "I", # isort
128 | "SIM", # flake8-simplify
129 | "TCH", # flake8-type-checking
130 | "TID", # flake8-tidy-imports
131 | "Q", # flake8-quotes
132 | "UP", # pyupgrade
133 | "PT", # flake8-pytest-style
134 | "RUF", # Ruff-specific rules
135 | ]
136 |
137 | ignore = [
138 | "E501", # "Line too long"
139 | # -> line length already regulated by black
140 | "PT011", # "pytest.raises() should specify expected exception"
141 | # -> would imply to update tests every time you update exception message
142 | "SIM102", # "Use a single `if` statement instead of nested `if` statements"
143 | # -> too restrictive,
144 | "D100",
145 | "D101",
146 | "D102",
147 | "D103",
148 | "D104",
149 | "D105",
150 | "D106",
151 | "D107"
152 | # -> "Missing docstring in public function too restrictive"
153 | ]
154 |
155 | [tool.ruff.pydocstyle]
156 | # Automatically disable rules that are incompatible with Google docstring convention
157 | convention = "google"
158 |
159 | [tool.ruff.pycodestyle]
160 | max-doc-length = 88
161 |
162 | [tool.ruff.flake8-tidy-imports]
163 | ban-relative-imports = "all"
164 |
165 | [tool.ruff.flake8-type-checking]
166 | strict = true
167 | runtime-evaluated-base-classes = ["pydantic.BaseModel"]
168 | # Pydantic needs to be able to evaluate types at runtime
169 | # see https://pypi.org/project/flake8-type-checking/ for flake8-type-checking documentation
170 | # see https://beta.ruff.rs/docs/settings/#flake8-type-checking-runtime-evaluated-base-classes for ruff documentation
171 |
172 | [tool.ruff.per-file-ignores]
173 | # Allow missing docstrings for tests
174 | "tests/**/*.py" = ["D1"]
175 |
176 | ## mypy
177 |
178 | [tool.mypy]
179 | python_version = "3.11"
180 | strict = true
181 | check_untyped_defs = false
182 | explicit_package_bases = true
183 | warn_unused_ignores = false
184 | exclude = ["tests"]
185 |
186 | [tool.mypy-llama-index]
187 | ignore_missing_imports = true
188 |
189 | [tool.pytest.ini_options]
190 | asyncio_mode = "auto"
191 | testpaths = ["tests"]
192 | addopts = [
193 | "--import-mode=importlib",
194 | ]
195 |
--------------------------------------------------------------------------------
/scripts/utils.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import shutil
4 | from typing import Any, ClassVar
5 |
6 | from private_gpt.paths import local_data_path
7 | from private_gpt.settings.settings import settings
8 |
9 |
10 | def wipe_file(file: str) -> None:
11 | if os.path.isfile(file):
12 | os.remove(file)
13 | print(f" - Deleted {file}")
14 |
15 |
16 | def wipe_tree(path: str) -> None:
17 | if not os.path.exists(path):
18 | print(f"Warning: Path not found {path}")
19 | return
20 | print(f"Wiping {path}...")
21 | all_files = os.listdir(path)
22 |
23 | files_to_remove = [file for file in all_files if file != ".gitignore"]
24 | for file_name in files_to_remove:
25 | file_path = os.path.join(path, file_name)
26 | try:
27 | if os.path.isfile(file_path):
28 | os.remove(file_path)
29 | elif os.path.isdir(file_path):
30 | shutil.rmtree(file_path)
31 | print(f" - Deleted {file_path}")
32 | except PermissionError:
33 | print(
34 | f"PermissionError: Unable to remove {file_path}. It is in use by another process."
35 | )
36 | continue
37 |
38 |
39 | class Postgres:
40 | tables: ClassVar[dict[str, list[str]]] = {
41 | "nodestore": ["data_docstore", "data_indexstore"],
42 | "vectorstore": ["data_embeddings"],
43 | }
44 |
45 | def __init__(self) -> None:
46 | try:
47 | import psycopg2
48 | except ModuleNotFoundError:
49 | raise ModuleNotFoundError("Postgres dependencies not found") from None
50 |
51 | connection = settings().postgres.model_dump(exclude_none=True)
52 | self.schema = connection.pop("schema_name")
53 | self.conn = psycopg2.connect(**connection)
54 |
55 | def wipe(self, storetype: str) -> None:
56 | cur = self.conn.cursor()
57 | try:
58 | for table in self.tables[storetype]:
59 | sql = f"DROP TABLE IF EXISTS {self.schema}.{table}"
60 | cur.execute(sql)
61 | print(f"Table {self.schema}.{table} dropped.")
62 | self.conn.commit()
63 | finally:
64 | cur.close()
65 |
66 | def stats(self, store_type: str) -> None:
67 | template = "SELECT '{table}', COUNT(*), pg_size_pretty(pg_total_relation_size('{table}')) FROM {table}"
68 | sql = " UNION ALL ".join(
69 | template.format(table=tbl) for tbl in self.tables[store_type]
70 | )
71 |
72 | cur = self.conn.cursor()
73 | try:
74 | print(f"Storage for Postgres {store_type}.")
75 | print("{:<15} | {:>15} | {:>9}".format("Table", "Rows", "Size"))
76 | print("-" * 45) # Print a line separator
77 |
78 | cur.execute(sql)
79 | for row in cur.fetchall():
80 | formatted_row_count = f"{row[1]:,}"
81 | print(f"{row[0]:<15} | {formatted_row_count:>15} | {row[2]:>9}")
82 |
83 | print()
84 | finally:
85 | cur.close()
86 |
87 | def __del__(self):
88 | if hasattr(self, "conn") and self.conn:
89 | self.conn.close()
90 |
91 |
92 | class Simple:
93 | def wipe(self, store_type: str) -> None:
94 | assert store_type == "nodestore"
95 | from llama_index.core.storage.docstore.types import (
96 | DEFAULT_PERSIST_FNAME as DOCSTORE,
97 | )
98 | from llama_index.core.storage.index_store.types import (
99 | DEFAULT_PERSIST_FNAME as INDEXSTORE,
100 | )
101 |
102 | for store in (DOCSTORE, INDEXSTORE):
103 | wipe_file(str((local_data_path / store).absolute()))
104 |
105 |
106 | class Chroma:
107 | def wipe(self, store_type: str) -> None:
108 | assert store_type == "vectorstore"
109 | wipe_tree(str((local_data_path / "chroma_db").absolute()))
110 |
111 |
112 | class Qdrant:
113 | COLLECTION = (
114 | "make_this_parameterizable_per_api_call" # ?! see vector_store_component.py
115 | )
116 |
117 | def __init__(self) -> None:
118 | try:
119 | from qdrant_client import QdrantClient # type: ignore
120 | except ImportError:
121 | raise ImportError("Qdrant dependencies not found") from None
122 | self.client = QdrantClient(**settings().qdrant.model_dump(exclude_none=True))
123 |
124 | def wipe(self, store_type: str) -> None:
125 | assert store_type == "vectorstore"
126 | try:
127 | self.client.delete_collection(self.COLLECTION)
128 | print("Collection dropped successfully.")
129 | except Exception as e:
130 | print("Error dropping collection:", e)
131 |
132 | def stats(self, store_type: str) -> None:
133 | print(f"Storage for Qdrant {store_type}.")
134 | try:
135 | collection_data = self.client.get_collection(self.COLLECTION)
136 | if collection_data:
137 | # Collection Info
138 | # https://qdrant.tech/documentation/concepts/collections/
139 | print(f"\tPoints: {collection_data.points_count:,}")
140 | print(f"\tVectors: {collection_data.vectors_count:,}")
141 | print(f"\tIndex Vectors: {collection_data.indexed_vectors_count:,}")
142 | return
143 | except ValueError:
144 | pass
145 | print("\t- Qdrant collection not found or empty")
146 |
147 |
148 | class Command:
149 | DB_HANDLERS: ClassVar[dict[str, Any]] = {
150 | "simple": Simple, # node store
151 | "chroma": Chroma, # vector store
152 | "postgres": Postgres, # node, index and vector store
153 | "qdrant": Qdrant, # vector store
154 | }
155 |
156 | def for_each_store(self, cmd: str):
157 | for store_type in ("nodestore", "vectorstore"):
158 | database = getattr(settings(), store_type).database
159 | handler_class = self.DB_HANDLERS.get(database)
160 | if handler_class is None:
161 | print(f"No handler found for database '{database}'")
162 | continue
163 | handler_instance = handler_class() # Instantiate the class
164 | # If the DB can handle this cmd dispatch it.
165 | if hasattr(handler_instance, cmd) and callable(
166 | func := getattr(handler_instance, cmd)
167 | ):
168 | func(store_type)
169 | else:
170 | print(
171 | f"Unable to execute command '{cmd}' on '{store_type}' in database '{database}'"
172 | )
173 |
174 | def execute(self, cmd: str) -> None:
175 | if cmd in ("wipe", "stats"):
176 | self.for_each_store(cmd)
177 |
178 |
179 | if __name__ == "__main__":
180 | parser = argparse.ArgumentParser()
181 | parser.add_argument("mode", help="select a mode to run", choices=["wipe", "stats"])
182 | args = parser.parse_args()
183 |
184 | Command().execute(args.mode.lower())
185 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🔒 PrivateGPT 📑
2 |
3 | [](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml?query=branch%3Amain)
4 | [](https://docs.privategpt.dev/)
5 |
6 |
7 | 
8 |
9 | PrivateGPT is a production-ready AI project that allows you to ask questions about your documents using the power
10 | of Large Language Models (LLMs), even in scenarios without an Internet connection. 100% private, no data leaves your
11 | execution environment at any point.
12 |
13 | The project provides an API offering all the primitives required to build private, context-aware AI applications.
14 | It follows and extends the [OpenAI API standard](https://openai.com/blog/openai-api),
15 | and supports both normal and streaming responses.
16 |
17 | The API is divided into two logical blocks:
18 |
19 | **High-level API**, which abstracts all the complexity of a RAG (Retrieval Augmented Generation)
20 | pipeline implementation:
21 | - Ingestion of documents: internally managing document parsing,
22 | splitting, metadata extraction, embedding generation and storage.
23 | - Chat & Completions using context from ingested documents:
24 | abstracting the retrieval of context, the prompt engineering and the response generation.
25 |
26 | **Low-level API**, which allows advanced users to implement their own complex pipelines:
27 | - Embeddings generation: based on a piece of text.
28 | - Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested documents.
29 |
30 | In addition to this, a working [Gradio UI](https://www.gradio.app/)
31 | client is provided to test the API, together with a set of useful tools such as bulk model
32 | download script, ingestion script, documents folder watch, etc.
33 |
34 | > 👂 **Need help applying PrivateGPT to your specific use case?**
35 | > [Let us know more about it](https://forms.gle/4cSDmH13RZBHV9at7)
36 | > and we'll try to help! We are refining PrivateGPT through your feedback.
37 |
38 | ## 🎞️ Overview
39 | DISCLAIMER: This README is not updated as frequently as the [documentation](https://docs.privategpt.dev/).
40 | Please check it out for the latest updates!
41 |
42 | ### Motivation behind PrivateGPT
43 | Generative AI is a game changer for our society, but adoption in companies of all sizes and data-sensitive
44 | domains like healthcare or legal is limited by a clear concern: **privacy**.
45 | Not being able to ensure that your data is fully under your control when using third-party AI tools
46 | is a risk those industries cannot take.
47 |
48 | ### Primordial version
49 | The first version of PrivateGPT was launched in May 2023 as a novel approach to address the privacy
50 | concerns by using LLMs in a complete offline way.
51 |
52 | That version, which rapidly became a go-to project for privacy-sensitive setups and served as the seed
53 | for thousands of local-focused generative AI projects, was the foundation of what PrivateGPT is becoming nowadays;
54 | thus a simpler and more educational implementation to understand the basic concepts required
55 | to build a fully local -and therefore, private- chatGPT-like tool.
56 |
57 | > It is strongly recommended to do a clean clone and install of this new version of
58 | PrivateGPT if you come from the previous, primordial version.
59 |
60 | ### Present and Future of PrivateGPT
61 | PrivateGPT is now evolving towards becoming a gateway to generative AI models and primitives, including
62 | completions, document ingestion, RAG pipelines and other low-level building blocks.
63 | We want to make it easier for any developer to build AI applications and experiences, as well as provide
64 | a suitable extensive architecture for the community to keep contributing.
65 |
66 | ## 📄 Documentation
67 | Full documentation on installation, dependencies, configuration, running the server, deployment options,
68 | ingesting local documents, API details and UI features can be found here: https://docs.privategpt.dev/
69 |
70 | ## 🧩 Architecture
71 | Conceptually, PrivateGPT is an API that wraps a RAG pipeline and exposes its
72 | primitives.
73 | * The API is built using [FastAPI](https://fastapi.tiangolo.com/) and follows
74 | [OpenAI's API scheme](https://platform.openai.com/docs/api-reference).
75 | * The RAG pipeline is based on [LlamaIndex](https://www.llamaindex.ai/).
76 |
77 | The design of PrivateGPT allows to easily extend and adapt both the API and the
78 | RAG implementation. Some key architectural decisions are:
79 | * Dependency Injection, decoupling the different components and layers.
80 | * Usage of LlamaIndex abstractions such as `LLM`, `BaseEmbedding` or `VectorStore`,
81 | making it immediate to change the actual implementations of those abstractions.
82 | * Simplicity, adding as few layers and new abstractions as possible.
83 | * Ready to use, providing a full implementation of the API and RAG
84 | pipeline.
85 |
86 | Main building blocks:
87 | * APIs are defined in `private_gpt:server:`. Each package contains an
88 | `_router.py` (FastAPI layer) and an `_service.py` (the
89 | service implementation). Each *Service* uses LlamaIndex base abstractions instead
90 | of specific implementations,
91 | decoupling the actual implementation from its usage.
92 | * Components are placed in
93 | `private_gpt:components:`. Each *Component* is in charge of providing
94 | actual implementations to the base abstractions used in the Services - for example
95 | `LLMComponent` is in charge of providing an actual implementation of an `LLM`
96 | (for example `LlamaCPP` or `OpenAI`).
97 |
98 | ## 💡 Contributing
99 | Contributions are welcomed! To ensure code quality we have enabled several format and
100 | typing checks, just run `make check` before committing to make sure your code is ok.
101 | Remember to test your code! You'll find a tests folder with helpers, and you can run
102 | tests using `make test` command.
103 |
104 | Don't know what to contribute? Here is the public
105 | [Project Board](https://github.com/users/imartinez/projects/3) with several ideas.
106 |
107 | Head over to Discord
108 | #contributors channel and ask for write permissions on that GitHub project.
109 |
110 | ## 💬 Community
111 | Join the conversation around PrivateGPT on our:
112 | - [Twitter (aka X)](https://twitter.com/PrivateGPT_AI)
113 | - [Discord](https://discord.gg/bK6mRVpErU)
114 |
115 | ## 📖 Citation
116 | If you use PrivateGPT in a paper, check out the [Citation file](CITATION.cff) for the correct citation.
117 | You can also use the "Cite this repository" button in this repo to get the citation in different formats.
118 |
119 | Here are a couple of examples:
120 |
121 | ## 🤗 Partners & Supporters
122 | PrivateGPT is actively supported by the teams behind:
123 | * [Qdrant](https://qdrant.tech/), providing the default vector database
124 | * [Fern](https://buildwithfern.com/), providing Documentation and SDKs
125 | * [LlamaIndex](https://www.llamaindex.ai/), providing the base RAG framework and abstractions
126 |
127 | This project has been strongly influenced and supported by other amazing projects like
128 | [LangChain](https://github.com/hwchase17/langchain),
129 | [GPT4All](https://github.com/nomic-ai/gpt4all),
130 | [LlamaCpp](https://github.com/ggerganov/llama.cpp),
131 | [Chroma](https://www.trychroma.com/)
132 | and [SentenceTransformers](https://www.sbert.net/).
133 |
--------------------------------------------------------------------------------
/private_gpt/components/vector_store/vector_store_component.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import typing
3 |
4 | from injector import inject, singleton
5 | from llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex
6 | from llama_index.core.vector_stores.types import (
7 | BasePydanticVectorStore,
8 | FilterCondition,
9 | MetadataFilter,
10 | MetadataFilters,
11 | )
12 |
13 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
14 | from private_gpt.paths import local_data_path
15 | from private_gpt.settings.settings import Settings
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 |
20 | def _doc_id_metadata_filter(
21 | context_filter: ContextFilter | None,
22 | ) -> MetadataFilters:
23 | filters = MetadataFilters(filters=[], condition=FilterCondition.OR)
24 |
25 | if context_filter is not None and context_filter.docs_ids is not None:
26 | for doc_id in context_filter.docs_ids:
27 | filters.filters.append(MetadataFilter(key="doc_id", value=doc_id))
28 |
29 | return filters
30 |
31 |
32 | @singleton
33 | class VectorStoreComponent:
34 | settings: Settings
35 | vector_store: BasePydanticVectorStore
36 |
37 | @inject
38 | def __init__(self, settings: Settings) -> None:
39 | self.settings = settings
40 | match settings.vectorstore.database:
41 | case "postgres":
42 | try:
43 | from llama_index.vector_stores.postgres import ( # type: ignore
44 | PGVectorStore,
45 | )
46 | except ImportError as e:
47 | raise ImportError(
48 | "Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`"
49 | ) from e
50 |
51 | if settings.postgres is None:
52 | raise ValueError(
53 | "Postgres settings not found. Please provide settings."
54 | )
55 |
56 | self.vector_store = typing.cast(
57 | BasePydanticVectorStore,
58 | PGVectorStore.from_params(
59 | **settings.postgres.model_dump(exclude_none=True),
60 | table_name="embeddings",
61 | embed_dim=settings.embedding.embed_dim,
62 | ),
63 | )
64 |
65 | case "chroma":
66 | try:
67 | import chromadb # type: ignore
68 | from chromadb.config import ( # type: ignore
69 | Settings as ChromaSettings,
70 | )
71 |
72 | from private_gpt.components.vector_store.batched_chroma import (
73 | BatchedChromaVectorStore,
74 | )
75 | except ImportError as e:
76 | raise ImportError(
77 | "ChromaDB dependencies not found, install with `poetry install --extras vector-stores-chroma`"
78 | ) from e
79 |
80 | chroma_settings = ChromaSettings(anonymized_telemetry=False)
81 | chroma_client = chromadb.PersistentClient(
82 | path=str((local_data_path / "chroma_db").absolute()),
83 | settings=chroma_settings,
84 | )
85 | chroma_collection = chroma_client.get_or_create_collection(
86 | "make_this_parameterizable_per_api_call"
87 | ) # TODO
88 |
89 | self.vector_store = typing.cast(
90 | BasePydanticVectorStore,
91 | BatchedChromaVectorStore(
92 | chroma_client=chroma_client, chroma_collection=chroma_collection
93 | ),
94 | )
95 |
96 | case "qdrant":
97 | try:
98 | from llama_index.vector_stores.qdrant import ( # type: ignore
99 | QdrantVectorStore,
100 | )
101 | from qdrant_client import QdrantClient # type: ignore
102 | except ImportError as e:
103 | raise ImportError(
104 | "Qdrant dependencies not found, install with `poetry install --extras vector-stores-qdrant`"
105 | ) from e
106 |
107 | if settings.qdrant is None:
108 | logger.info(
109 | "Qdrant config not found. Using default settings."
110 | "Trying to connect to Qdrant at localhost:6333."
111 | )
112 | client = QdrantClient()
113 | else:
114 | client = QdrantClient(
115 | **settings.qdrant.model_dump(exclude_none=True)
116 | )
117 | self.vector_store = typing.cast(
118 | BasePydanticVectorStore,
119 | QdrantVectorStore(
120 | client=client,
121 | collection_name="make_this_parameterizable_per_api_call",
122 | ), # TODO
123 | )
124 | case "clickhouse":
125 | try:
126 | from clickhouse_connect import ( # type: ignore
127 | get_client,
128 | )
129 | from llama_index.vector_stores.clickhouse import ( # type: ignore
130 | ClickHouseVectorStore,
131 | )
132 | except ImportError as e:
133 | raise ImportError(
134 | "ClickHouse dependencies not found, install with `poetry install --extras vector-stores-clickhouse`"
135 | ) from e
136 |
137 | if settings.clickhouse is None:
138 | raise ValueError(
139 | "ClickHouse settings not found. Please provide settings."
140 | )
141 |
142 | clickhouse_client = get_client(
143 | host=settings.clickhouse.host,
144 | port=settings.clickhouse.port,
145 | username=settings.clickhouse.username,
146 | password=settings.clickhouse.password,
147 | )
148 | self.vector_store = ClickHouseVectorStore(
149 | clickhouse_client=clickhouse_client
150 | )
151 | case _:
152 | # Should be unreachable
153 | # The settings validator should have caught this
154 | raise ValueError(
155 | f"Vectorstore database {settings.vectorstore.database} not supported"
156 | )
157 |
158 | def get_retriever(
159 | self,
160 | index: VectorStoreIndex,
161 | context_filter: ContextFilter | None = None,
162 | similarity_top_k: int = 2,
163 | ) -> VectorIndexRetriever:
164 | # This way we support qdrant (using doc_ids) and the rest (using filters)
165 | return VectorIndexRetriever(
166 | index=index,
167 | similarity_top_k=similarity_top_k,
168 | doc_ids=context_filter.docs_ids if context_filter else None,
169 | filters=(
170 | _doc_id_metadata_filter(context_filter)
171 | if self.settings.vectorstore.database != "qdrant"
172 | else None
173 | ),
174 | )
175 |
176 | def close(self) -> None:
177 | if hasattr(self.vector_store.client, "close"):
178 | self.vector_store.client.close()
179 |
--------------------------------------------------------------------------------
/private_gpt/server/chat/chat_service.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 |
3 | from injector import inject, singleton
4 | from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine
5 | from llama_index.core.chat_engine.types import (
6 | BaseChatEngine,
7 | )
8 | from llama_index.core.indices import VectorStoreIndex
9 | from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
10 | from llama_index.core.llms import ChatMessage, MessageRole
11 | from llama_index.core.postprocessor import (
12 | SentenceTransformerRerank,
13 | SimilarityPostprocessor,
14 | )
15 | from llama_index.core.storage import StorageContext
16 | from llama_index.core.types import TokenGen
17 | from pydantic import BaseModel
18 |
19 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent
20 | from private_gpt.components.llm.llm_component import LLMComponent
21 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent
22 | from private_gpt.components.vector_store.vector_store_component import (
23 | VectorStoreComponent,
24 | )
25 | from private_gpt.open_ai.extensions.context_filter import ContextFilter
26 | from private_gpt.server.chunks.chunks_service import Chunk
27 | from private_gpt.settings.settings import Settings
28 |
29 |
30 | class Completion(BaseModel):
31 | response: str
32 | sources: list[Chunk] | None = None
33 |
34 |
35 | class CompletionGen(BaseModel):
36 | response: TokenGen
37 | sources: list[Chunk] | None = None
38 |
39 |
40 | @dataclass
41 | class ChatEngineInput:
42 | system_message: ChatMessage | None = None
43 | last_message: ChatMessage | None = None
44 | chat_history: list[ChatMessage] | None = None
45 |
46 | @classmethod
47 | def from_messages(cls, messages: list[ChatMessage]) -> "ChatEngineInput":
48 | # Detect if there is a system message, extract the last message and chat history
49 | system_message = (
50 | messages[0]
51 | if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM
52 | else None
53 | )
54 | last_message = (
55 | messages[-1]
56 | if len(messages) > 0 and messages[-1].role == MessageRole.USER
57 | else None
58 | )
59 | # Remove from messages list the system message and last message,
60 | # if they exist. The rest is the chat history.
61 | if system_message:
62 | messages.pop(0)
63 | if last_message:
64 | messages.pop(-1)
65 | chat_history = messages if len(messages) > 0 else None
66 |
67 | return cls(
68 | system_message=system_message,
69 | last_message=last_message,
70 | chat_history=chat_history,
71 | )
72 |
73 |
74 | @singleton
75 | class ChatService:
76 | settings: Settings
77 |
78 | @inject
79 | def __init__(
80 | self,
81 | settings: Settings,
82 | llm_component: LLMComponent,
83 | vector_store_component: VectorStoreComponent,
84 | embedding_component: EmbeddingComponent,
85 | node_store_component: NodeStoreComponent,
86 | ) -> None:
87 | self.settings = settings
88 | self.llm_component = llm_component
89 | self.embedding_component = embedding_component
90 | self.vector_store_component = vector_store_component
91 | self.storage_context = StorageContext.from_defaults(
92 | vector_store=vector_store_component.vector_store,
93 | docstore=node_store_component.doc_store,
94 | index_store=node_store_component.index_store,
95 | )
96 | self.index = VectorStoreIndex.from_vector_store(
97 | vector_store_component.vector_store,
98 | storage_context=self.storage_context,
99 | llm=llm_component.llm,
100 | embed_model=embedding_component.embedding_model,
101 | show_progress=True,
102 | )
103 |
104 | def _chat_engine(
105 | self,
106 | system_prompt: str | None = None,
107 | use_context: bool = False,
108 | context_filter: ContextFilter | None = None,
109 | ) -> BaseChatEngine:
110 | settings = self.settings
111 | if use_context:
112 | vector_index_retriever = self.vector_store_component.get_retriever(
113 | index=self.index,
114 | context_filter=context_filter,
115 | similarity_top_k=self.settings.rag.similarity_top_k,
116 | )
117 | node_postprocessors = [
118 | MetadataReplacementPostProcessor(target_metadata_key="window"),
119 | SimilarityPostprocessor(
120 | similarity_cutoff=settings.rag.similarity_value
121 | ),
122 | ]
123 |
124 | if settings.rag.rerank.enabled:
125 | rerank_postprocessor = SentenceTransformerRerank(
126 | model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n
127 | )
128 | node_postprocessors.append(rerank_postprocessor)
129 |
130 | return ContextChatEngine.from_defaults(
131 | system_prompt=system_prompt,
132 | retriever=vector_index_retriever,
133 | llm=self.llm_component.llm, # Takes no effect at the moment
134 | node_postprocessors=node_postprocessors,
135 | )
136 | else:
137 | return SimpleChatEngine.from_defaults(
138 | system_prompt=system_prompt,
139 | llm=self.llm_component.llm,
140 | )
141 |
142 | def stream_chat(
143 | self,
144 | messages: list[ChatMessage],
145 | use_context: bool = False,
146 | context_filter: ContextFilter | None = None,
147 | ) -> CompletionGen:
148 | chat_engine_input = ChatEngineInput.from_messages(messages)
149 | last_message = (
150 | chat_engine_input.last_message.content
151 | if chat_engine_input.last_message
152 | else None
153 | )
154 | system_prompt = (
155 | chat_engine_input.system_message.content
156 | if chat_engine_input.system_message
157 | else None
158 | )
159 | chat_history = (
160 | chat_engine_input.chat_history if chat_engine_input.chat_history else None
161 | )
162 |
163 | chat_engine = self._chat_engine(
164 | system_prompt=system_prompt,
165 | use_context=use_context,
166 | context_filter=context_filter,
167 | )
168 | streaming_response = chat_engine.stream_chat(
169 | message=last_message if last_message is not None else "",
170 | chat_history=chat_history,
171 | )
172 | sources = [Chunk.from_node(node) for node in streaming_response.source_nodes]
173 | completion_gen = CompletionGen(
174 | response=streaming_response.response_gen, sources=sources
175 | )
176 | return completion_gen
177 |
178 | def chat(
179 | self,
180 | messages: list[ChatMessage],
181 | use_context: bool = False,
182 | context_filter: ContextFilter | None = None,
183 | ) -> Completion:
184 | chat_engine_input = ChatEngineInput.from_messages(messages)
185 | last_message = (
186 | chat_engine_input.last_message.content
187 | if chat_engine_input.last_message
188 | else None
189 | )
190 | system_prompt = (
191 | chat_engine_input.system_message.content
192 | if chat_engine_input.system_message
193 | else None
194 | )
195 | chat_history = (
196 | chat_engine_input.chat_history if chat_engine_input.chat_history else None
197 | )
198 |
199 | chat_engine = self._chat_engine(
200 | system_prompt=system_prompt,
201 | use_context=use_context,
202 | context_filter=context_filter,
203 | )
204 | wrapped_response = chat_engine.chat(
205 | message=last_message if last_message is not None else "",
206 | chat_history=chat_history,
207 | )
208 | sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes]
209 | completion = Completion(response=wrapped_response.response, sources=sources)
210 | return completion
211 |
--------------------------------------------------------------------------------