├── version.txt ├── local_data └── .gitignore ├── models └── .gitignore ├── private_gpt ├── components │ ├── __init__.py │ ├── ingest │ │ ├── __init__.py │ │ └── ingest_helper.py │ ├── embedding │ │ ├── __init__.py │ │ ├── custom │ │ │ ├── __init__.py │ │ │ └── sagemaker.py │ │ └── embedding_component.py │ ├── llm │ │ ├── custom │ │ │ └── __init__.py │ │ └── __init__.py │ ├── node_store │ │ ├── __init__.py │ │ └── node_store_component.py │ └── vector_store │ │ ├── __init__.py │ │ ├── batched_chroma.py │ │ └── vector_store_component.py ├── server │ ├── chat │ │ ├── __init__.py │ │ ├── chat_router.py │ │ └── chat_service.py │ ├── chunks │ │ ├── __init__.py │ │ ├── chunks_router.py │ │ └── chunks_service.py │ ├── health │ │ ├── __init__.py │ │ └── health_router.py │ ├── ingest │ │ ├── __init__.py │ │ ├── model.py │ │ ├── ingest_watcher.py │ │ ├── ingest_router.py │ │ └── ingest_service.py │ ├── utils │ │ ├── __init__.py │ │ └── auth.py │ ├── embeddings │ │ ├── __init__.py │ │ ├── embeddings_service.py │ │ └── embeddings_router.py │ ├── __init__.py │ └── completions │ │ ├── __init__.py │ │ └── completions_router.py ├── settings │ ├── __init__.py │ ├── yaml.py │ └── settings_loader.py ├── ui │ ├── __init__.py │ ├── avatar-bot.ico │ └── images.py ├── utils │ ├── __init__.py │ ├── typing.py │ └── eta.py ├── open_ai │ ├── __init__.py │ ├── extensions │ │ ├── __init__.py │ │ └── context_filter.py │ └── openai_models.py ├── constants.py ├── main.py ├── __main__.py ├── paths.py ├── di.py ├── __init__.py └── launcher.py ├── tests ├── __init__.py ├── fixtures │ ├── __init__.py │ ├── fast_api_test_client.py │ ├── auto_close_qdrant.py │ ├── ingest_helper.py │ └── mock_injector.py ├── server │ ├── ingest │ │ ├── test.pdf │ │ ├── test_ingest_routes.py │ │ └── test.txt │ ├── chunks │ │ ├── chunk_test.txt │ │ └── test_chunk_routes.py │ ├── utils │ │ ├── test_auth.py │ │ └── test_simple_auth.py │ ├── embeddings │ │ └── test_embedding_routes.py │ └── chat │ │ └── test_chat_routes.py ├── ui │ └── test_ui.py ├── conftest.py ├── settings │ ├── test_settings.py │ └── test_settings_loader.py └── test_prompt_helper.py ├── tiktoken_cache └── .gitignore ├── scripts ├── __init__.py ├── extract_openapi.py ├── setup ├── ingest_folder.py └── utils.py ├── fern ├── fern.config.json ├── docs │ ├── assets │ │ ├── ui.png │ │ ├── favicon.ico │ │ ├── header.jpeg │ │ ├── logo_dark.png │ │ └── logo_light.png │ └── pages │ │ ├── manual │ │ ├── ingestion-reset.mdx │ │ ├── reranker.mdx │ │ ├── nodestore.mdx │ │ ├── ui.mdx │ │ ├── settings.mdx │ │ └── ingestion.mdx │ │ ├── api-reference │ │ ├── api-reference.mdx │ │ └── sdks.mdx │ │ ├── overview │ │ └── welcome.mdx │ │ ├── installation │ │ └── concepts.mdx │ │ └── recipes │ │ └── list-llm.mdx ├── generators.yml ├── README.md └── docs.yml ├── .dockerignore ├── settings-gemini.yaml ├── settings-openai.yaml ├── settings-mock.yaml ├── settings-sagemaker.yaml ├── settings-test.yaml ├── docker-compose.yaml ├── .github └── workflows │ ├── release-please.yml │ ├── fern-check.yml │ ├── publish-docs.yml │ ├── actions │ └── install_dependencies │ │ └── action.yml │ ├── stale.yml │ ├── docker.yml │ ├── preview-docs.yml │ └── tests.yml ├── .gitignore ├── settings-azopenai.yaml ├── settings-vllm.yaml ├── CITATION.cff ├── settings-local.yaml ├── settings-ollama-pg.yaml ├── Dockerfile.external ├── .pre-commit-config.yaml ├── settings-docker.yaml ├── Dockerfile.local ├── settings-ollama.yaml ├── Makefile ├── settings.yaml ├── pyproject.toml └── README.md /version.txt: -------------------------------------------------------------------------------- 1 | 0.5.0 2 | -------------------------------------------------------------------------------- /local_data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /private_gpt/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/chat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/chunks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/health/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/ingest/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Tests.""" 2 | -------------------------------------------------------------------------------- /private_gpt/components/ingest/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/server/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tiktoken_cache/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /private_gpt/components/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/components/llm/custom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/components/node_store/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/components/vector_store/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/components/embedding/custom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /private_gpt/settings/__init__.py: -------------------------------------------------------------------------------- 1 | """Settings.""" 2 | -------------------------------------------------------------------------------- /private_gpt/ui/__init__.py: -------------------------------------------------------------------------------- 1 | """Gradio based UI.""" 2 | -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """PrivateGPT scripts.""" 2 | -------------------------------------------------------------------------------- /tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | """Global fixtures.""" 2 | -------------------------------------------------------------------------------- /private_gpt/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """general utils.""" 2 | -------------------------------------------------------------------------------- /private_gpt/server/__init__.py: -------------------------------------------------------------------------------- 1 | """private-gpt server.""" 2 | -------------------------------------------------------------------------------- /private_gpt/components/llm/__init__.py: -------------------------------------------------------------------------------- 1 | """LLM implementations.""" 2 | -------------------------------------------------------------------------------- /private_gpt/open_ai/__init__.py: -------------------------------------------------------------------------------- 1 | """OpenAI compatibility utilities.""" 2 | -------------------------------------------------------------------------------- /private_gpt/open_ai/extensions/__init__.py: -------------------------------------------------------------------------------- 1 | """OpenAI API extensions.""" 2 | -------------------------------------------------------------------------------- /fern/fern.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization": "privategpt", 3 | "version": "0.31.17" 4 | } -------------------------------------------------------------------------------- /private_gpt/server/completions/__init__.py: -------------------------------------------------------------------------------- 1 | """Deprecated Openai compatibility endpoint.""" 2 | -------------------------------------------------------------------------------- /fern/docs/assets/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/ui.png -------------------------------------------------------------------------------- /private_gpt/constants.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | PROJECT_ROOT_PATH: Path = Path(__file__).parents[1] 4 | -------------------------------------------------------------------------------- /fern/docs/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/favicon.ico -------------------------------------------------------------------------------- /fern/docs/assets/header.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/header.jpeg -------------------------------------------------------------------------------- /fern/docs/assets/logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/logo_dark.png -------------------------------------------------------------------------------- /private_gpt/ui/avatar-bot.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/private_gpt/ui/avatar-bot.ico -------------------------------------------------------------------------------- /private_gpt/utils/typing.py: -------------------------------------------------------------------------------- 1 | from typing import TypeVar 2 | 3 | T = TypeVar("T") 4 | K = TypeVar("K") 5 | V = TypeVar("V") 6 | -------------------------------------------------------------------------------- /tests/server/ingest/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/tests/server/ingest/test.pdf -------------------------------------------------------------------------------- /fern/docs/assets/logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frozen-dev71/private_gpt_medical/main/fern/docs/assets/logo_light.png -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .venv 2 | models 3 | .github 4 | .vscode 5 | .DS_Store 6 | .mypy_cache 7 | .ruff_cache 8 | local_data 9 | terraform 10 | tests 11 | Dockerfile 12 | Dockerfile.* -------------------------------------------------------------------------------- /tests/server/chunks/chunk_test.txt: -------------------------------------------------------------------------------- 1 | e88c1005-637d-4cb4-ae79-9b8eb58cab97 2 | 3 | b483dd15-78c4-4d67-b546-21a0d690bf43 4 | 5 | a8080238-b294-4598-ac9c-7abf4c8e0552 6 | 7 | 14208dac-c600-4a18-872b-5e45354cfff2 -------------------------------------------------------------------------------- /settings-gemini.yaml: -------------------------------------------------------------------------------- 1 | llm: 2 | mode: gemini 3 | 4 | embedding: 5 | mode: gemini 6 | 7 | gemini: 8 | api_key: ${GOOGLE_API_KEY:} 9 | model: models/gemini-pro 10 | embedding_model: models/embedding-001 11 | -------------------------------------------------------------------------------- /settings-openai.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:openai} 3 | 4 | llm: 5 | mode: openai 6 | 7 | embedding: 8 | mode: openai 9 | 10 | openai: 11 | api_key: ${OPENAI_API_KEY:} 12 | model: gpt-3.5-turbo 13 | -------------------------------------------------------------------------------- /fern/generators.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | public: 3 | generators: 4 | - name: fernapi/fern-python-sdk 5 | version: 0.6.2 6 | output: 7 | location: local-file-system 8 | path: ../../pgpt-sdk/python 9 | -------------------------------------------------------------------------------- /private_gpt/main.py: -------------------------------------------------------------------------------- 1 | """FastAPI app creation, logger configuration and main API routes.""" 2 | 3 | from private_gpt.di import global_injector 4 | from private_gpt.launcher import create_app 5 | 6 | app = create_app(global_injector) 7 | -------------------------------------------------------------------------------- /private_gpt/open_ai/extensions/context_filter.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | 3 | 4 | class ContextFilter(BaseModel): 5 | docs_ids: list[str] | None = Field( 6 | examples=[["c202d5e6-7b69-4869-81cc-dd574ee8ee11"]] 7 | ) 8 | -------------------------------------------------------------------------------- /settings-mock.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:mock} 3 | 4 | # This configuration allows you to use GPU for creating embeddings while avoiding loading LLM into vRAM 5 | llm: 6 | mode: mock 7 | 8 | embedding: 9 | mode: huggingface 10 | -------------------------------------------------------------------------------- /tests/server/utils/test_auth.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | 4 | def test_default_does_not_require_auth(test_client: TestClient) -> None: 5 | response_before = test_client.get("/v1/ingest/list") 6 | assert response_before.status_code == 200 7 | -------------------------------------------------------------------------------- /settings-sagemaker.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:sagemaker} 3 | port: ${PORT:8001} 4 | 5 | ui: 6 | enabled: true 7 | path: / 8 | 9 | llm: 10 | mode: sagemaker 11 | 12 | embedding: 13 | mode: sagemaker 14 | 15 | sagemaker: 16 | llm_endpoint_name: llm 17 | embedding_endpoint_name: embedding -------------------------------------------------------------------------------- /settings-test.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: test 3 | auth: 4 | enabled: false 5 | # Dummy secrets used for tests 6 | secret: "foo bar; dummy secret" 7 | 8 | data: 9 | local_data_folder: local_data/tests 10 | 11 | qdrant: 12 | path: local_data/tests 13 | 14 | llm: 15 | mode: mock 16 | 17 | embedding: 18 | mode: mock 19 | 20 | ui: 21 | enabled: false -------------------------------------------------------------------------------- /tests/ui/test_ui.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi.testclient import TestClient 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "test_client", [{"ui": {"enabled": True, "path": "/ui"}}], indirect=True 7 | ) 8 | def test_ui_starts_in_the_given_endpoint(test_client: TestClient) -> None: 9 | response = test_client.get("/ui") 10 | assert response.status_code == 200 11 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | private-gpt: 3 | build: 4 | dockerfile: Dockerfile.external 5 | volumes: 6 | - ./local_data/:/home/worker/app/local_data 7 | ports: 8 | - 8001:8080 9 | environment: 10 | PORT: 8080 11 | PGPT_PROFILES: docker 12 | PGPT_MODE: ollama 13 | ollama: 14 | image: ollama/ollama:latest 15 | volumes: 16 | - ./models:/root/.ollama 17 | -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- 1 | name: release-please 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: write 10 | pull-requests: write 11 | 12 | jobs: 13 | release-please: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: google-github-actions/release-please-action@v3 17 | with: 18 | release-type: simple 19 | version-file: version.txt -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .env 3 | venv 4 | 5 | settings-me.yaml 6 | 7 | .ruff_cache 8 | .pytest_cache 9 | .mypy_cache 10 | 11 | # byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | 15 | # unit tests / coverage reports 16 | /tests-results.xml 17 | /.coverage 18 | /coverage.xml 19 | /htmlcov/ 20 | 21 | # pyenv 22 | /.python-version 23 | 24 | # IDE 25 | .idea/ 26 | .vscode/ 27 | /.run/ 28 | .fleet/ 29 | 30 | # macOS 31 | .DS_Store 32 | -------------------------------------------------------------------------------- /.github/workflows/fern-check.yml: -------------------------------------------------------------------------------- 1 | name: fern check 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - "fern/**" 9 | 10 | jobs: 11 | fern-check: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout repo 15 | uses: actions/checkout@v4 16 | 17 | - name: Install Fern 18 | run: npm install -g fern-api 19 | 20 | - name: Check Fern API is valid 21 | run: fern check -------------------------------------------------------------------------------- /fern/docs/pages/manual/ingestion-reset.mdx: -------------------------------------------------------------------------------- 1 | # Reset Local documents database 2 | 3 | When running in a local setup, you can remove all ingested documents by simply 4 | deleting all contents of `local_data` folder (except .gitignore). 5 | 6 | To simplify this process, you can use the command: 7 | ```bash 8 | make wipe 9 | ``` 10 | 11 | # Advanced usage 12 | 13 | You can actually delete your documents from your storage by using the 14 | API endpoint `DELETE` in the Ingestion API. -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | from glob import glob 4 | 5 | root_path = pathlib.Path(__file__).parents[1] 6 | # This is to prevent a bug in intellij that uses the wrong working directory 7 | os.chdir(root_path) 8 | 9 | 10 | def _as_module(fixture_path: str) -> str: 11 | return fixture_path.replace("/", ".").replace("\\", ".").replace(".py", "") 12 | 13 | 14 | pytest_plugins = [_as_module(fixture) for fixture in glob("tests/fixtures/[!_]*.py")] 15 | -------------------------------------------------------------------------------- /private_gpt/__main__.py: -------------------------------------------------------------------------------- 1 | # start a fastapi server with uvicorn 2 | 3 | import uvicorn 4 | 5 | from private_gpt.main import app 6 | from private_gpt.settings.settings import settings 7 | 8 | # Set log_config=None to do not use the uvicorn logging configuration, and 9 | # use ours instead. For reference, see below: 10 | # https://github.com/tiangolo/fastapi/discussions/7457#discussioncomment-5141108 11 | uvicorn.run(app, host="0.0.0.0", port=settings().server.port, log_config=None) 12 | -------------------------------------------------------------------------------- /settings-azopenai.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:azopenai} 3 | 4 | llm: 5 | mode: azopenai 6 | 7 | embedding: 8 | mode: azopenai 9 | 10 | azopenai: 11 | api_key: ${AZ_OPENAI_API_KEY:} 12 | azure_endpoint: ${AZ_OPENAI_ENDPOINT:} 13 | embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} 14 | llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} 15 | api_version: "2023-05-15" 16 | embedding_model: text-embedding-ada-002 17 | llm_model: gpt-35-turbo -------------------------------------------------------------------------------- /settings-vllm.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:vllm} 3 | 4 | llm: 5 | mode: openailike 6 | max_new_tokens: 512 7 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2 8 | temperature: 0.1 9 | 10 | embedding: 11 | mode: huggingface 12 | ingest_mode: simple 13 | 14 | huggingface: 15 | embedding_hf_model_name: BAAI/bge-small-en-v1.5 16 | 17 | openai: 18 | api_base: http://localhost:8000/v1 19 | api_key: EMPTY 20 | model: facebook/opt-125m 21 | request_timeout: 600.0 -------------------------------------------------------------------------------- /tests/settings/test_settings.py: -------------------------------------------------------------------------------- 1 | from private_gpt.settings.settings import Settings, settings 2 | from tests.fixtures.mock_injector import MockInjector 3 | 4 | 5 | def test_settings_are_loaded_and_merged() -> None: 6 | assert settings().server.env_name == "test" 7 | 8 | 9 | def test_settings_can_be_overriden(injector: MockInjector) -> None: 10 | injector.bind_settings({"server": {"env_name": "overriden"}}) 11 | mocked_settings = injector.get(Settings) 12 | assert mocked_settings.server.env_name == "overriden" 13 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: PrivateGPT 6 | message: >- 7 | If you use this software, please cite it using the 8 | metadata from this file. 9 | type: software 10 | authors: 11 | - name: Zylon by PrivateGPT 12 | address: hello@zylon.ai 13 | website: 'https://www.zylon.ai/' 14 | repository-code: 'https://github.com/zylon-ai/private-gpt' 15 | license: Apache-2.0 16 | date-released: '2023-05-02' 17 | -------------------------------------------------------------------------------- /private_gpt/server/health/health_router.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from fastapi import APIRouter 4 | from pydantic import BaseModel, Field 5 | 6 | # Not authentication or authorization required to get the health status. 7 | health_router = APIRouter() 8 | 9 | 10 | class HealthResponse(BaseModel): 11 | status: Literal["ok"] = Field(default="ok") 12 | 13 | 14 | @health_router.get("/health", tags=["Health"]) 15 | def health() -> HealthResponse: 16 | """Return ok if the system is up.""" 17 | return HealthResponse(status="ok") 18 | -------------------------------------------------------------------------------- /tests/fixtures/fast_api_test_client.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi.testclient import TestClient 3 | 4 | from private_gpt.launcher import create_app 5 | from tests.fixtures.mock_injector import MockInjector 6 | 7 | 8 | @pytest.fixture() 9 | def test_client(request: pytest.FixtureRequest, injector: MockInjector) -> TestClient: 10 | if request is not None and hasattr(request, "param"): 11 | injector.bind_settings(request.param or {}) 12 | 13 | app_under_test = create_app(injector.test_injector) 14 | return TestClient(app_under_test) 15 | -------------------------------------------------------------------------------- /private_gpt/paths.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from private_gpt.constants import PROJECT_ROOT_PATH 4 | from private_gpt.settings.settings import settings 5 | 6 | 7 | def _absolute_or_from_project_root(path: str) -> Path: 8 | if path.startswith("/"): 9 | return Path(path) 10 | return PROJECT_ROOT_PATH / path 11 | 12 | 13 | models_path: Path = PROJECT_ROOT_PATH / "models" 14 | models_cache_path: Path = models_path / "cache" 15 | docs_path: Path = PROJECT_ROOT_PATH / "docs" 16 | local_data_path: Path = _absolute_or_from_project_root( 17 | settings().data.local_data_folder 18 | ) 19 | -------------------------------------------------------------------------------- /private_gpt/di.py: -------------------------------------------------------------------------------- 1 | from injector import Injector 2 | 3 | from private_gpt.settings.settings import Settings, unsafe_typed_settings 4 | 5 | 6 | def create_application_injector() -> Injector: 7 | _injector = Injector(auto_bind=True) 8 | _injector.binder.bind(Settings, to=unsafe_typed_settings) 9 | return _injector 10 | 11 | 12 | """ 13 | Global injector for the application. 14 | 15 | Avoid using this reference, it will make your code harder to test. 16 | 17 | Instead, use the `request.state.injector` reference, which is bound to every request 18 | """ 19 | global_injector: Injector = create_application_injector() 20 | -------------------------------------------------------------------------------- /.github/workflows/publish-docs.yml: -------------------------------------------------------------------------------- 1 | name: publish docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "fern/**" 9 | 10 | jobs: 11 | publish-docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout repo 15 | uses: actions/checkout@v4 16 | 17 | - name: Setup node 18 | uses: actions/setup-node@v3 19 | 20 | - name: Download Fern 21 | run: npm install -g fern-api 22 | 23 | - name: Generate and Publish Docs 24 | env: 25 | FERN_TOKEN: ${{ secrets.FERN_TOKEN }} 26 | run: fern generate --docs --log-level debug 27 | -------------------------------------------------------------------------------- /tests/fixtures/auto_close_qdrant.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from private_gpt.components.vector_store.vector_store_component import ( 4 | VectorStoreComponent, 5 | ) 6 | from tests.fixtures.mock_injector import MockInjector 7 | 8 | 9 | @pytest.fixture(autouse=True) 10 | def _auto_close_vector_store_client(injector: MockInjector) -> None: 11 | """Auto close VectorStore client after each test. 12 | 13 | VectorStore client (qdrant/chromadb) opens a connection the 14 | Database that causes issues when running tests too fast, 15 | so close explicitly after each test. 16 | """ 17 | yield 18 | injector.get(VectorStoreComponent).close() 19 | -------------------------------------------------------------------------------- /tests/server/embeddings/test_embedding_routes.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from private_gpt.server.embeddings.embeddings_router import ( 4 | EmbeddingsBody, 5 | EmbeddingsResponse, 6 | ) 7 | 8 | 9 | def test_embeddings_generation(test_client: TestClient) -> None: 10 | body = EmbeddingsBody(input="Embed me") 11 | response = test_client.post("/v1/embeddings", json=body.model_dump()) 12 | 13 | assert response.status_code == 200 14 | embedding_response = EmbeddingsResponse.model_validate(response.json()) 15 | assert len(embedding_response.data) > 0 16 | assert len(embedding_response.data[0].embedding) > 0 17 | -------------------------------------------------------------------------------- /settings-local.yaml: -------------------------------------------------------------------------------- 1 | # poetry install --extras "ui llms-llama-cpp vector-stores-qdrant embeddings-huggingface" 2 | server: 3 | env_name: ${APP_ENV:local} 4 | 5 | llm: 6 | mode: llamacpp 7 | # Should be matching the selected model 8 | max_new_tokens: 512 9 | context_window: 3900 10 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2 11 | prompt_style: "mistral" 12 | 13 | llamacpp: 14 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF 15 | llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf 16 | 17 | embedding: 18 | mode: huggingface 19 | 20 | huggingface: 21 | embedding_hf_model_name: BAAI/bge-small-en-v1.5 22 | 23 | vectorstore: 24 | database: qdrant 25 | 26 | qdrant: 27 | path: local_data/private_gpt/qdrant 28 | -------------------------------------------------------------------------------- /fern/docs/pages/api-reference/api-reference.mdx: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | The API is divided in two logical blocks: 4 | 5 | 1. High-level API, abstracting all the complexity of a RAG (Retrieval Augmented Generation) pipeline implementation: 6 | - Ingestion of documents: internally managing document parsing, splitting, metadata extraction, 7 | embedding generation and storage. 8 | - Chat & Completions using context from ingested documents: abstracting the retrieval of context, the prompt 9 | engineering and the response generation. 10 | 11 | 2. Low-level API, allowing advanced users to implement their own complex pipelines: 12 | - Embeddings generation: based on a piece of text. 13 | - Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested 14 | documents. -------------------------------------------------------------------------------- /tests/fixtures/ingest_helper.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from fastapi.testclient import TestClient 5 | 6 | from private_gpt.server.ingest.ingest_router import IngestResponse 7 | 8 | 9 | class IngestHelper: 10 | def __init__(self, test_client: TestClient): 11 | self.test_client = test_client 12 | 13 | def ingest_file(self, path: Path) -> IngestResponse: 14 | files = {"file": (path.name, path.open("rb"))} 15 | 16 | response = self.test_client.post("/v1/ingest/file", files=files) 17 | assert response.status_code == 200 18 | ingest_result = IngestResponse.model_validate(response.json()) 19 | return ingest_result 20 | 21 | 22 | @pytest.fixture() 23 | def ingest_helper(test_client: TestClient) -> IngestHelper: 24 | return IngestHelper(test_client) 25 | -------------------------------------------------------------------------------- /tests/server/chunks/test_chunk_routes.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from fastapi.testclient import TestClient 4 | 5 | from private_gpt.server.chunks.chunks_router import ChunksBody, ChunksResponse 6 | from tests.fixtures.ingest_helper import IngestHelper 7 | 8 | 9 | def test_chunks_retrieval(test_client: TestClient, ingest_helper: IngestHelper) -> None: 10 | # Make sure there is at least some chunk to query in the database 11 | path = Path(__file__).parents[0] / "chunk_test.txt" 12 | ingest_helper.ingest_file(path) 13 | 14 | body = ChunksBody(text="b483dd15-78c4-4d67-b546-21a0d690bf43") 15 | response = test_client.post("/v1/chunks", json=body.model_dump()) 16 | assert response.status_code == 200 17 | chunk_response = ChunksResponse.model_validate(response.json()) 18 | assert len(chunk_response.data) > 0 19 | -------------------------------------------------------------------------------- /settings-ollama-pg.yaml: -------------------------------------------------------------------------------- 1 | # Using ollama and postgres for the vector, doc and index store. Ollama is also used for embeddings. 2 | # To use install these extras: 3 | # poetry install --extras "llms-ollama ui vector-stores-postgres embeddings-ollama storage-nodestore-postgres" 4 | server: 5 | env_name: ${APP_ENV:ollama} 6 | 7 | llm: 8 | mode: ollama 9 | max_new_tokens: 512 10 | context_window: 3900 11 | 12 | embedding: 13 | mode: ollama 14 | embed_dim: 768 15 | 16 | ollama: 17 | llm_model: mistral 18 | embedding_model: nomic-embed-text 19 | api_base: http://localhost:11434 20 | 21 | nodestore: 22 | database: postgres 23 | 24 | vectorstore: 25 | database: postgres 26 | 27 | postgres: 28 | host: localhost 29 | port: 5432 30 | database: postgres 31 | user: postgres 32 | password: admin 33 | schema_name: private_gpt 34 | 35 | -------------------------------------------------------------------------------- /.github/workflows/actions/install_dependencies/action.yml: -------------------------------------------------------------------------------- 1 | name: "Install Dependencies" 2 | description: "Action to build the project dependencies from the main versions" 3 | inputs: 4 | python_version: 5 | required: true 6 | type: string 7 | default: "3.11.4" 8 | poetry_version: 9 | required: true 10 | type: string 11 | default: "1.5.1" 12 | 13 | runs: 14 | using: composite 15 | steps: 16 | - name: Install Poetry 17 | uses: snok/install-poetry@v1 18 | with: 19 | version: ${{ inputs.poetry_version }} 20 | virtualenvs-create: true 21 | virtualenvs-in-project: false 22 | installer-parallel: true 23 | - uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ inputs.python_version }} 26 | cache: "poetry" 27 | - name: Install Dependencies 28 | run: poetry install --extras "ui vector-stores-qdrant" --no-root 29 | shell: bash 30 | 31 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. 2 | # 3 | # You can adjust the behavior by modifying this file. 4 | # For more information, see: 5 | # https://github.com/actions/stale 6 | name: Mark stale issues and pull requests 7 | 8 | on: 9 | schedule: 10 | - cron: '42 5 * * *' 11 | 12 | jobs: 13 | stale: 14 | 15 | runs-on: ubuntu-latest 16 | permissions: 17 | issues: write 18 | pull-requests: write 19 | 20 | steps: 21 | - uses: actions/stale@v8 22 | with: 23 | repo-token: ${{ secrets.GITHUB_TOKEN }} 24 | days-before-stale: 15 25 | stale-issue-message: 'Stale issue' 26 | stale-pr-message: 'Stale pull request' 27 | stale-issue-label: 'stale' 28 | stale-pr-label: 'stale' 29 | exempt-issue-labels: 'autorelease: pending' 30 | exempt-pr-labels: 'autorelease: pending' 31 | -------------------------------------------------------------------------------- /fern/docs/pages/api-reference/sdks.mdx: -------------------------------------------------------------------------------- 1 | We use [Fern](www.buildwithfern.com) to offer API clients for Node.js, Python, Go, and Java. 2 | We recommend using these clients to interact with our endpoints. 3 | The clients are kept up to date automatically, so we encourage you to use the latest version. 4 | 5 | ## SDKs 6 | 7 | *Coming soon!* 8 | 9 | 10 | 15 | 20 |
21 |
22 | 23 |
24 | 25 | 26 | 31 | 36 | 37 | 38 |
39 | -------------------------------------------------------------------------------- /private_gpt/server/embeddings/embeddings_service.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from injector import inject, singleton 4 | from pydantic import BaseModel, Field 5 | 6 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent 7 | 8 | 9 | class Embedding(BaseModel): 10 | index: int 11 | object: Literal["embedding"] 12 | embedding: list[float] = Field(examples=[[0.0023064255, -0.009327292]]) 13 | 14 | 15 | @singleton 16 | class EmbeddingsService: 17 | @inject 18 | def __init__(self, embedding_component: EmbeddingComponent) -> None: 19 | self.embedding_model = embedding_component.embedding_model 20 | 21 | def texts_embeddings(self, texts: list[str]) -> list[Embedding]: 22 | texts_embeddings = self.embedding_model.get_text_embedding_batch(texts) 23 | return [ 24 | Embedding( 25 | index=texts_embeddings.index(embedding), 26 | object="embedding", 27 | embedding=embedding, 28 | ) 29 | for embedding in texts_embeddings 30 | ] 31 | -------------------------------------------------------------------------------- /private_gpt/__init__.py: -------------------------------------------------------------------------------- 1 | """private-gpt.""" 2 | 3 | import logging 4 | import os 5 | 6 | # Set to 'DEBUG' to have extensive logging turned on, even for libraries 7 | ROOT_LOG_LEVEL = "INFO" 8 | 9 | PRETTY_LOG_FORMAT = ( 10 | "%(asctime)s.%(msecs)03d [%(levelname)-8s] %(name)+25s - %(message)s" 11 | ) 12 | logging.basicConfig(level=ROOT_LOG_LEVEL, format=PRETTY_LOG_FORMAT, datefmt="%H:%M:%S") 13 | logging.captureWarnings(True) 14 | 15 | # Disable gradio analytics 16 | # This is done this way because gradio does not solely rely on what values are 17 | # passed to gr.Blocks(enable_analytics=...) but also on the environment 18 | # variable GRADIO_ANALYTICS_ENABLED. `gradio.strings` actually reads this env 19 | # directly, so to fully disable gradio analytics we need to set this env var. 20 | os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" 21 | 22 | # Disable chromaDB telemetry 23 | # It is already disabled, see PR#1144 24 | # os.environ["ANONYMIZED_TELEMETRY"] = "False" 25 | 26 | # adding tiktoken cache path within repo to be able to run in offline environment. 27 | os.environ["TIKTOKEN_CACHE_DIR"] = "tiktoken_cache" 28 | -------------------------------------------------------------------------------- /private_gpt/server/ingest/model.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Literal 2 | 3 | from llama_index.core.schema import Document 4 | from pydantic import BaseModel, Field 5 | 6 | 7 | class IngestedDoc(BaseModel): 8 | object: Literal["ingest.document"] 9 | doc_id: str = Field(examples=["c202d5e6-7b69-4869-81cc-dd574ee8ee11"]) 10 | doc_metadata: dict[str, Any] | None = Field( 11 | examples=[ 12 | { 13 | "page_label": "2", 14 | "file_name": "Sales Report Q3 2023.pdf", 15 | } 16 | ] 17 | ) 18 | 19 | @staticmethod 20 | def curate_metadata(metadata: dict[str, Any]) -> dict[str, Any]: 21 | """Remove unwanted metadata keys.""" 22 | for key in ["doc_id", "window", "original_text"]: 23 | metadata.pop(key, None) 24 | return metadata 25 | 26 | @staticmethod 27 | def from_document(document: Document) -> "IngestedDoc": 28 | return IngestedDoc( 29 | object="ingest.document", 30 | doc_id=document.doc_id, 31 | doc_metadata=IngestedDoc.curate_metadata(document.metadata), 32 | ) 33 | -------------------------------------------------------------------------------- /Dockerfile.external: -------------------------------------------------------------------------------- 1 | FROM python:3.11.6-slim-bookworm as base 2 | 3 | # Install poetry 4 | RUN pip install pipx 5 | RUN python3 -m pipx ensurepath 6 | RUN pipx install poetry 7 | ENV PATH="/root/.local/bin:$PATH" 8 | ENV PATH=".venv/bin/:$PATH" 9 | 10 | # https://python-poetry.org/docs/configuration/#virtualenvsin-project 11 | ENV POETRY_VIRTUALENVS_IN_PROJECT=true 12 | 13 | FROM base as dependencies 14 | WORKDIR /home/worker/app 15 | COPY pyproject.toml poetry.lock ./ 16 | 17 | RUN poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-ollama" 18 | 19 | FROM base as app 20 | 21 | ENV PYTHONUNBUFFERED=1 22 | ENV PORT=8080 23 | EXPOSE 8080 24 | 25 | # Prepare a non-root user 26 | RUN adduser --system worker 27 | WORKDIR /home/worker/app 28 | 29 | RUN mkdir local_data; chown worker local_data 30 | RUN mkdir models; chown worker models 31 | COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv 32 | COPY --chown=worker private_gpt/ private_gpt 33 | COPY --chown=worker fern/ fern 34 | COPY --chown=worker *.yaml *.md ./ 35 | COPY --chown=worker scripts/ scripts 36 | 37 | ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" 38 | 39 | USER worker 40 | ENTRYPOINT python -m private_gpt -------------------------------------------------------------------------------- /scripts/extract_openapi.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import sys 4 | 5 | import yaml 6 | from uvicorn.importer import import_from_string 7 | 8 | parser = argparse.ArgumentParser(prog="extract_openapi.py") 9 | parser.add_argument("app", help='App import string. Eg. "main:app"', default="main:app") 10 | parser.add_argument("--app-dir", help="Directory containing the app", default=None) 11 | parser.add_argument( 12 | "--out", help="Output file ending in .json or .yaml", default="openapi.yaml" 13 | ) 14 | 15 | if __name__ == "__main__": 16 | args = parser.parse_args() 17 | 18 | if args.app_dir is not None: 19 | print(f"adding {args.app_dir} to sys.path") 20 | sys.path.insert(0, args.app_dir) 21 | 22 | print(f"importing app from {args.app}") 23 | app = import_from_string(args.app) 24 | openapi = app.openapi() 25 | version = openapi.get("openapi", "unknown version") 26 | 27 | print(f"writing openapi spec v{version}") 28 | with open(args.out, "w") as f: 29 | if args.out.endswith(".json"): 30 | json.dump(openapi, f, indent=2) 31 | else: 32 | yaml.dump(openapi, f, sort_keys=False) 33 | 34 | print(f"spec written to {args.out}") 35 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_install_hook_types: 2 | # Mandatory to install both pre-commit and pre-push hooks (see https://pre-commit.com/#top_level-default_install_hook_types) 3 | # Add new hook types here to ensure automatic installation when running `pre-commit install` 4 | - pre-commit 5 | - pre-push 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v4.3.0 9 | hooks: 10 | - id: trailing-whitespace 11 | - id: end-of-file-fixer 12 | - id: check-yaml 13 | - id: check-json 14 | - id: check-added-large-files 15 | 16 | - repo: local 17 | hooks: 18 | - id: black 19 | name: Formatting (black) 20 | entry: black 21 | language: system 22 | types: [python] 23 | stages: [commit] 24 | - id: ruff 25 | name: Linter (ruff) 26 | entry: ruff 27 | language: system 28 | types: [python] 29 | stages: [commit] 30 | - id: mypy 31 | name: Type checking (mypy) 32 | entry: make mypy 33 | pass_filenames: false 34 | language: system 35 | types: [python] 36 | stages: [commit] 37 | - id: test 38 | name: Unit tests (pytest) 39 | entry: make test 40 | pass_filenames: false 41 | language: system 42 | types: [python] 43 | stages: [push] -------------------------------------------------------------------------------- /settings-docker.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:prod} 3 | port: ${PORT:8080} 4 | 5 | llm: 6 | mode: ${PGPT_MODE:mock} 7 | 8 | embedding: 9 | mode: ${PGPT_MODE:sagemaker} 10 | 11 | llamacpp: 12 | llm_hf_repo_id: ${PGPT_HF_REPO_ID:TheBloke/Mistral-7B-Instruct-v0.1-GGUF} 13 | llm_hf_model_file: ${PGPT_HF_MODEL_FILE:mistral-7b-instruct-v0.1.Q4_K_M.gguf} 14 | 15 | huggingface: 16 | embedding_hf_model_name: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5} 17 | 18 | sagemaker: 19 | llm_endpoint_name: ${PGPT_SAGEMAKER_LLM_ENDPOINT_NAME:} 20 | embedding_endpoint_name: ${PGPT_SAGEMAKER_EMBEDDING_ENDPOINT_NAME:} 21 | 22 | ollama: 23 | llm_model: ${PGPT_OLLAMA_LLM_MODEL:mistral} 24 | embedding_model: ${PGPT_OLLAMA_EMBEDDING_MODEL:nomic-embed-text} 25 | api_base: ${PGPT_OLLAMA_API_BASE:http://ollama:11434} 26 | embedding_api_base: ${PGPT_OLLAMA_EMBEDDING_API_BASE:http://ollama:11434} 27 | tfs_z: ${PGPT_OLLAMA_TFS_Z:1.0} 28 | top_k: ${PGPT_OLLAMA_TOP_K:40} 29 | top_p: ${PGPT_OLLAMA_TOP_P:0.9} 30 | repeat_last_n: ${PGPT_OLLAMA_REPEAT_LAST_N:64} 31 | repeat_penalty: ${PGPT_OLLAMA_REPEAT_PENALTY:1.2} 32 | request_timeout: ${PGPT_OLLAMA_REQUEST_TIMEOUT:600.0} 33 | 34 | ui: 35 | enabled: true 36 | path: / 37 | -------------------------------------------------------------------------------- /private_gpt/server/embeddings/embeddings_router.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from fastapi import APIRouter, Depends, Request 4 | from pydantic import BaseModel 5 | 6 | from private_gpt.server.embeddings.embeddings_service import ( 7 | Embedding, 8 | EmbeddingsService, 9 | ) 10 | from private_gpt.server.utils.auth import authenticated 11 | 12 | embeddings_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) 13 | 14 | 15 | class EmbeddingsBody(BaseModel): 16 | input: str | list[str] 17 | 18 | 19 | class EmbeddingsResponse(BaseModel): 20 | object: Literal["list"] 21 | model: Literal["private-gpt"] 22 | data: list[Embedding] 23 | 24 | 25 | @embeddings_router.post("/embeddings", tags=["Embeddings"]) 26 | def embeddings_generation(request: Request, body: EmbeddingsBody) -> EmbeddingsResponse: 27 | """Get a vector representation of a given input. 28 | 29 | That vector representation can be easily consumed 30 | by machine learning models and algorithms. 31 | """ 32 | service = request.state.injector.get(EmbeddingsService) 33 | input_texts = body.input if isinstance(body.input, list) else [body.input] 34 | embeddings = service.texts_embeddings(input_texts) 35 | return EmbeddingsResponse(object="list", model="private-gpt", data=embeddings) 36 | -------------------------------------------------------------------------------- /tests/server/chat/test_chat_routes.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | from private_gpt.open_ai.openai_models import OpenAICompletion, OpenAIMessage 4 | from private_gpt.server.chat.chat_router import ChatBody 5 | 6 | 7 | def test_chat_route_produces_a_stream(test_client: TestClient) -> None: 8 | body = ChatBody( 9 | messages=[OpenAIMessage(content="test", role="user")], 10 | use_context=False, 11 | stream=True, 12 | ) 13 | response = test_client.post("/v1/chat/completions", json=body.model_dump()) 14 | 15 | raw_events = response.text.split("\n\n") 16 | events = [ 17 | item.removeprefix("data: ") for item in raw_events if item.startswith("data: ") 18 | ] 19 | assert response.status_code == 200 20 | assert "text/event-stream" in response.headers["content-type"] 21 | assert len(events) > 0 22 | assert events[-1] == "[DONE]" 23 | 24 | 25 | def test_chat_route_produces_a_single_value(test_client: TestClient) -> None: 26 | body = ChatBody( 27 | messages=[OpenAIMessage(content="test", role="user")], 28 | use_context=False, 29 | stream=False, 30 | ) 31 | response = test_client.post("/v1/chat/completions", json=body.model_dump()) 32 | 33 | # No asserts, if it validates it's good 34 | OpenAICompletion.model_validate(response.json()) 35 | assert response.status_code == 200 36 | -------------------------------------------------------------------------------- /tests/settings/test_settings_loader.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | 4 | import pytest 5 | 6 | from private_gpt.settings.yaml import load_yaml_with_envvars 7 | 8 | 9 | def test_environment_variables_are_loaded() -> None: 10 | sample_yaml = """ 11 | replaced: ${TEST_REPLACE_ME} 12 | """ 13 | env = {"TEST_REPLACE_ME": "replaced"} 14 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), env) 15 | os.environ.copy() 16 | assert loaded["replaced"] == "replaced" 17 | 18 | 19 | def test_environment_defaults_variables_are_loaded() -> None: 20 | sample_yaml = """ 21 | replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME:BAAI/bge-small-en-v1.5} 22 | """ 23 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {}) 24 | assert loaded["replaced"] == "BAAI/bge-small-en-v1.5" 25 | 26 | 27 | def test_environment_defaults_variables_are_loaded_with_duplicated_delimiters() -> None: 28 | sample_yaml = """ 29 | replaced: ${PGPT_EMBEDDING_HF_MODEL_NAME::duped::} 30 | """ 31 | loaded = load_yaml_with_envvars(io.StringIO(sample_yaml), {}) 32 | assert loaded["replaced"] == ":duped::" 33 | 34 | 35 | def test_environment_without_defaults_fails() -> None: 36 | sample_yaml = """ 37 | replaced: ${TEST_REPLACE_ME} 38 | """ 39 | with pytest.raises(ValueError) as error: 40 | load_yaml_with_envvars(io.StringIO(sample_yaml), {}) 41 | assert error is not None 42 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: docker 2 | 3 | on: 4 | release: 5 | types: [ published ] 6 | workflow_dispatch: 7 | 8 | env: 9 | REGISTRY: ghcr.io 10 | IMAGE_NAME: ${{ github.repository }} 11 | 12 | jobs: 13 | build-and-push-image: 14 | runs-on: ubuntu-latest 15 | permissions: 16 | contents: read 17 | packages: write 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v4 21 | - name: Log in to the Container registry 22 | uses: docker/login-action@v3 23 | with: 24 | registry: ${{ env.REGISTRY }} 25 | username: ${{ github.actor }} 26 | password: ${{ secrets.GITHUB_TOKEN }} 27 | - name: Extract metadata (tags, labels) for Docker 28 | id: meta 29 | uses: docker/metadata-action@v5 30 | with: 31 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 32 | tags: | 33 | type=ref,event=branch 34 | type=ref,event=pr 35 | type=semver,pattern={{version}} 36 | type=semver,pattern={{major}}.{{minor}} 37 | type=sha 38 | - name: Build and push Docker image 39 | uses: docker/build-push-action@v5 40 | with: 41 | context: . 42 | file: Dockerfile.external 43 | push: true 44 | tags: ${{ steps.meta.outputs.tags }} 45 | labels: ${{ steps.meta.outputs.labels }} 46 | -------------------------------------------------------------------------------- /fern/docs/pages/overview/welcome.mdx: -------------------------------------------------------------------------------- 1 | PrivateGPT provides an **API** containing all the building blocks required to 2 | build **private, context-aware AI applications**. 3 | The API follows and extends OpenAI API standard, and supports both normal and streaming responses. 4 | That means that, if you can use OpenAI API in one of your tools, you can use your own PrivateGPT API instead, 5 | with no code changes, **and for free** if you are running privateGPT in a `local` setup. 6 | 7 | Get started by understanding the [Main Concepts and Installation](/installation) and then dive into the [API Reference](/api-reference). 8 | 9 | ## Frequently Visited Resources 10 | 11 | 12 | 17 | 22 | 27 | 32 | 33 | 34 |
35 | 36 | 37 | 38 | A working **Gradio UI client** is provided to test the API, together with a set of useful tools such as bulk 39 | model download script, ingestion script, documents folder watch, etc. 40 | -------------------------------------------------------------------------------- /tests/fixtures/mock_injector.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from typing import Any 3 | from unittest.mock import MagicMock 4 | 5 | import pytest 6 | from injector import Provider, ScopeDecorator, singleton 7 | 8 | from private_gpt.di import create_application_injector 9 | from private_gpt.settings.settings import Settings, unsafe_settings 10 | from private_gpt.settings.settings_loader import merge_settings 11 | from private_gpt.utils.typing import T 12 | 13 | 14 | class MockInjector: 15 | def __init__(self) -> None: 16 | self.test_injector = create_application_injector() 17 | 18 | def bind_mock( 19 | self, 20 | interface: type[T], 21 | mock: (T | (Callable[..., T] | Provider[T])) | None = None, 22 | *, 23 | scope: ScopeDecorator = singleton, 24 | ) -> T: 25 | if mock is None: 26 | mock = MagicMock() 27 | self.test_injector.binder.bind(interface, to=mock, scope=scope) 28 | return mock # type: ignore 29 | 30 | def bind_settings(self, settings: dict[str, Any]) -> Settings: 31 | merged = merge_settings([unsafe_settings, settings]) 32 | new_settings = Settings(**merged) 33 | self.test_injector.binder.bind(Settings, new_settings) 34 | return new_settings 35 | 36 | def get(self, interface: type[T]) -> T: 37 | return self.test_injector.get(interface) 38 | 39 | 40 | @pytest.fixture() 41 | def injector() -> MockInjector: 42 | return MockInjector() 43 | -------------------------------------------------------------------------------- /private_gpt/settings/yaml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import typing 4 | from typing import Any, TextIO 5 | 6 | from yaml import SafeLoader 7 | 8 | _env_replace_matcher = re.compile(r"\$\{(\w|_)+:?.*}") 9 | 10 | 11 | @typing.no_type_check # pyaml does not have good hints, everything is Any 12 | def load_yaml_with_envvars( 13 | stream: TextIO, environ: dict[str, Any] = os.environ 14 | ) -> dict[str, Any]: 15 | """Load yaml file with environment variable expansion. 16 | 17 | The pattern ${VAR} or ${VAR:default} will be replaced with 18 | the value of the environment variable. 19 | """ 20 | loader = SafeLoader(stream) 21 | 22 | def load_env_var(_, node) -> str: 23 | """Extract the matched value, expand env variable, and replace the match.""" 24 | value = str(node.value).removeprefix("${").removesuffix("}") 25 | split = value.split(":", 1) 26 | env_var = split[0] 27 | value = environ.get(env_var) 28 | default = None if len(split) == 1 else split[1] 29 | if value is None and default is None: 30 | raise ValueError( 31 | f"Environment variable {env_var} is not set and not default was provided" 32 | ) 33 | return value or default 34 | 35 | loader.add_implicit_resolver("env_var_replacer", _env_replace_matcher, None) 36 | loader.add_constructor("env_var_replacer", load_env_var) 37 | 38 | try: 39 | return loader.get_single_data() 40 | finally: 41 | loader.dispose() 42 | -------------------------------------------------------------------------------- /Dockerfile.local: -------------------------------------------------------------------------------- 1 | ### IMPORTANT, THIS IMAGE CAN ONLY BE RUN IN LINUX DOCKER 2 | ### You will run into a segfault in mac 3 | FROM python:3.11.6-slim-bookworm as base 4 | 5 | # Install poetry 6 | RUN pip install pipx 7 | RUN python3 -m pipx ensurepath 8 | RUN pipx install poetry 9 | ENV PATH="/root/.local/bin:$PATH" 10 | ENV PATH=".venv/bin/:$PATH" 11 | 12 | # Dependencies to build llama-cpp 13 | RUN apt update && apt install -y \ 14 | libopenblas-dev\ 15 | ninja-build\ 16 | build-essential\ 17 | pkg-config\ 18 | wget 19 | 20 | # https://python-poetry.org/docs/configuration/#virtualenvsin-project 21 | ENV POETRY_VIRTUALENVS_IN_PROJECT=true 22 | 23 | FROM base as dependencies 24 | WORKDIR /home/worker/app 25 | COPY pyproject.toml poetry.lock ./ 26 | 27 | RUN poetry install --extras "ui embeddings-huggingface llms-llama-cpp vector-stores-qdrant" 28 | 29 | FROM base as app 30 | 31 | ENV PYTHONUNBUFFERED=1 32 | ENV PORT=8080 33 | EXPOSE 8080 34 | 35 | # Prepare a non-root user 36 | RUN adduser --group worker 37 | RUN adduser --system --ingroup worker worker 38 | WORKDIR /home/worker/app 39 | 40 | RUN mkdir local_data; chown worker local_data 41 | RUN mkdir models; chown worker models 42 | COPY --chown=worker --from=dependencies /home/worker/app/.venv/ .venv 43 | COPY --chown=worker private_gpt/ private_gpt 44 | COPY --chown=worker fern/ fern 45 | COPY --chown=worker *.yaml *.md ./ 46 | COPY --chown=worker scripts/ scripts 47 | 48 | ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" 49 | 50 | USER worker 51 | ENTRYPOINT python -m private_gpt -------------------------------------------------------------------------------- /fern/README.md: -------------------------------------------------------------------------------- 1 | # Documentation of privateGPT 2 | 3 | The documentation of this project is being rendered thanks to [fern](https://github.com/fern-api/fern). 4 | 5 | Fern is basically transforming your `.md` and `.mdx` files into a static website: your documentation. 6 | 7 | The configuration of your documentation is done in the `./docs.yml` file. 8 | There, you can configure the navbar, tabs, sections and pages being rendered. 9 | 10 | The documentation of fern (and the syntax of its configuration `docs.yml`) is 11 | available there [docs.buildwithfern.com](https://docs.buildwithfern.com/). 12 | 13 | ## How to run fern 14 | 15 | **You cannot render your documentation locally without fern credentials.** 16 | 17 | To see how your documentation looks like, you **have to** use the CICD of this 18 | repository (by opening a PR, CICD job will be executed, and a preview of 19 | your PR's documentation will be deployed in vercel automatically, through fern). 20 | 21 | The only thing you can do locally, is to run `fern check`, which check the syntax of 22 | your `docs.yml` file. 23 | 24 | ## How to add a new page 25 | Add in the `docs.yml` a new `page`, with the following syntax: 26 | 27 | ```yml 28 | navigation: 29 | # ... 30 | - tab: my-existing-tab 31 | layout: 32 | # ... 33 | - section: My Existing Section 34 | contents: 35 | # ... 36 | - page: My new page display name 37 | # The path of the page, relative to `fern/` 38 | path: ./docs/pages/my-existing-tab/new-page-content.mdx 39 | ``` -------------------------------------------------------------------------------- /private_gpt/server/ingest/ingest_watcher.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | from watchdog.events import ( 6 | FileCreatedEvent, 7 | FileModifiedEvent, 8 | FileSystemEvent, 9 | FileSystemEventHandler, 10 | ) 11 | from watchdog.observers import Observer 12 | 13 | 14 | class IngestWatcher: 15 | def __init__( 16 | self, watch_path: Path, on_file_changed: Callable[[Path], None] 17 | ) -> None: 18 | self.watch_path = watch_path 19 | self.on_file_changed = on_file_changed 20 | 21 | class Handler(FileSystemEventHandler): 22 | def on_modified(self, event: FileSystemEvent) -> None: 23 | if isinstance(event, FileModifiedEvent): 24 | on_file_changed(Path(event.src_path)) 25 | 26 | def on_created(self, event: FileSystemEvent) -> None: 27 | if isinstance(event, FileCreatedEvent): 28 | on_file_changed(Path(event.src_path)) 29 | 30 | event_handler = Handler() 31 | observer: Any = Observer() 32 | self._observer = observer 33 | self._observer.schedule(event_handler, str(watch_path), recursive=True) 34 | 35 | def start(self) -> None: 36 | self._observer.start() 37 | while self._observer.is_alive(): 38 | try: 39 | self._observer.join(1) 40 | except KeyboardInterrupt: 41 | break 42 | 43 | def stop(self) -> None: 44 | self._observer.stop() 45 | self._observer.join() 46 | -------------------------------------------------------------------------------- /.github/workflows/preview-docs.yml: -------------------------------------------------------------------------------- 1 | name: deploy preview docs 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | - main 7 | paths: 8 | - "fern/**" 9 | 10 | jobs: 11 | preview-docs: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout repository 16 | uses: actions/checkout@v4 17 | with: 18 | ref: refs/pull/${{ github.event.pull_request.number }}/merge 19 | 20 | - name: Setup Node.js 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: "18" 24 | 25 | - name: Install Fern 26 | run: npm install -g fern-api 27 | 28 | - name: Generate Documentation Preview with Fern 29 | id: generate_docs 30 | env: 31 | FERN_TOKEN: ${{ secrets.FERN_TOKEN }} 32 | run: | 33 | output=$(fern generate --docs --preview --log-level debug) 34 | echo "$output" 35 | # Extract the URL 36 | preview_url=$(echo "$output" | grep -oP '(?<=Published docs to )https://[^\s]*') 37 | # Set the output for the step 38 | echo "::set-output name=preview_url::$preview_url" 39 | - name: Comment PR with URL using github-actions bot 40 | uses: actions/github-script@v4 41 | if: ${{ steps.generate_docs.outputs.preview_url }} 42 | with: 43 | script: | 44 | const preview_url = '${{ steps.generate_docs.outputs.preview_url }}'; 45 | const issue_number = context.issue.number; 46 | github.issues.createComment({ 47 | ...context.repo, 48 | issue_number: issue_number, 49 | body: `Published docs preview URL: ${preview_url}` 50 | }) 51 | -------------------------------------------------------------------------------- /settings-ollama.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | env_name: ${APP_ENV:ollama} 3 | 4 | llm: 5 | mode: ollama 6 | max_new_tokens: 512 7 | context_window: 3900 8 | temperature: 0.1 #The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) 9 | 10 | embedding: 11 | mode: ollama 12 | 13 | ollama: 14 | llm_model: mistral 15 | embedding_model: nomic-embed-text 16 | api_base: http://localhost:11434 17 | embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama 18 | keep_alive: 5m 19 | tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. 20 | top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) 21 | top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) 22 | repeat_last_n: 64 # Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) 23 | repeat_penalty: 1.2 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) 24 | request_timeout: 120.0 # Time elapsed until ollama times out the request. Default is 120s. Format is float. 25 | 26 | vectorstore: 27 | database: qdrant 28 | 29 | qdrant: 30 | path: local_data/private_gpt/qdrant 31 | -------------------------------------------------------------------------------- /scripts/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import argparse 4 | 5 | from huggingface_hub import hf_hub_download, snapshot_download 6 | from transformers import AutoTokenizer 7 | 8 | from private_gpt.paths import models_path, models_cache_path 9 | from private_gpt.settings.settings import settings 10 | 11 | resume_download = True 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser(prog='Setup: Download models from Hugging Face') 14 | parser.add_argument('--resume', default=True, action=argparse.BooleanOptionalAction, help='Enable/Disable resume_download options to restart the download progress interrupted') 15 | args = parser.parse_args() 16 | resume_download = args.resume 17 | 18 | os.makedirs(models_path, exist_ok=True) 19 | 20 | # Download Embedding model 21 | embedding_path = models_path / "embedding" 22 | print(f"Downloading embedding {settings().huggingface.embedding_hf_model_name}") 23 | snapshot_download( 24 | repo_id=settings().huggingface.embedding_hf_model_name, 25 | cache_dir=models_cache_path, 26 | local_dir=embedding_path, 27 | ) 28 | print("Embedding model downloaded!") 29 | 30 | # Download LLM and create a symlink to the model file 31 | print(f"Downloading LLM {settings().llamacpp.llm_hf_model_file}") 32 | hf_hub_download( 33 | repo_id=settings().llamacpp.llm_hf_repo_id, 34 | filename=settings().llamacpp.llm_hf_model_file, 35 | cache_dir=models_cache_path, 36 | local_dir=models_path, 37 | resume_download=resume_download, 38 | ) 39 | print("LLM model downloaded!") 40 | 41 | # Download Tokenizer 42 | print(f"Downloading tokenizer {settings().llm.tokenizer}") 43 | AutoTokenizer.from_pretrained( 44 | pretrained_model_name_or_path=settings().llm.tokenizer, 45 | cache_dir=models_cache_path, 46 | ) 47 | print("Tokenizer downloaded!") 48 | 49 | print("Setup done") 50 | -------------------------------------------------------------------------------- /fern/docs/pages/manual/reranker.mdx: -------------------------------------------------------------------------------- 1 | ## Enhancing Response Quality with Reranking 2 | 3 | PrivateGPT offers a reranking feature aimed at optimizing response generation by filtering out irrelevant documents, potentially leading to faster response times and enhanced relevance of answers generated by the LLM. 4 | 5 | ### Enabling Reranking 6 | 7 | Document reranking can significantly improve the efficiency and quality of the responses by pre-selecting the most relevant documents before generating an answer. To leverage this feature, ensure that it is enabled in the RAG settings and consider adjusting the parameters to best fit your use case. 8 | 9 | #### Additional Requirements 10 | 11 | Before enabling reranking, you must install additional dependencies: 12 | 13 | ```bash 14 | poetry install --extras rerank-sentence-transformers 15 | ``` 16 | 17 | This command installs dependencies for the cross-encoder reranker from sentence-transformers, which is currently the only supported method by PrivateGPT for document reranking. 18 | 19 | #### Configuration 20 | 21 | To enable and configure reranking, adjust the `rag` section within the `settings.yaml` file. Here are the key settings to consider: 22 | 23 | - `similarity_top_k`: Determines the number of documents to initially retrieve and consider for reranking. This value should be larger than `top_n`. 24 | - `rerank`: 25 | - `enabled`: Set to `true` to activate the reranking feature. 26 | - `top_n`: Specifies the number of documents to use in the final answer generation process, chosen from the top-ranked documents provided by `similarity_top_k`. 27 | 28 | Example configuration snippet: 29 | 30 | ```yaml 31 | rag: 32 | similarity_top_k: 10 # Number of documents to retrieve and consider for reranking 33 | rerank: 34 | enabled: true 35 | top_n: 3 # Number of top-ranked documents to use for generating the answer 36 | ``` -------------------------------------------------------------------------------- /private_gpt/settings/settings_loader.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | import os 4 | import sys 5 | from collections.abc import Iterable 6 | from pathlib import Path 7 | from typing import Any 8 | 9 | from pydantic.v1.utils import deep_update, unique_list 10 | 11 | from private_gpt.constants import PROJECT_ROOT_PATH 12 | from private_gpt.settings.yaml import load_yaml_with_envvars 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | _settings_folder = os.environ.get("PGPT_SETTINGS_FOLDER", PROJECT_ROOT_PATH) 17 | 18 | # if running in unittest, use the test profile 19 | _test_profile = ["test"] if "tests.fixtures" in sys.modules else [] 20 | 21 | active_profiles: list[str] = unique_list( 22 | ["default"] 23 | + [ 24 | item.strip() 25 | for item in os.environ.get("PGPT_PROFILES", "").split(",") 26 | if item.strip() 27 | ] 28 | + _test_profile 29 | ) 30 | 31 | 32 | def merge_settings(settings: Iterable[dict[str, Any]]) -> dict[str, Any]: 33 | return functools.reduce(deep_update, settings, {}) 34 | 35 | 36 | def load_settings_from_profile(profile: str) -> dict[str, Any]: 37 | if profile == "default": 38 | profile_file_name = "settings.yaml" 39 | else: 40 | profile_file_name = f"settings-{profile}.yaml" 41 | 42 | path = Path(_settings_folder) / profile_file_name 43 | with Path(path).open("r") as f: 44 | config = load_yaml_with_envvars(f) 45 | if not isinstance(config, dict): 46 | raise TypeError(f"Config file has no top-level mapping: {path}") 47 | return config 48 | 49 | 50 | def load_active_settings() -> dict[str, Any]: 51 | """Load active profiles and merge them.""" 52 | logger.info("Starting application with profiles=%s", active_profiles) 53 | loaded_profiles = [ 54 | load_settings_from_profile(profile) for profile in active_profiles 55 | ] 56 | merged: dict[str, Any] = merge_settings(loaded_profiles) 57 | return merged 58 | -------------------------------------------------------------------------------- /tests/server/ingest/test_ingest_routes.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | 4 | from fastapi.testclient import TestClient 5 | 6 | from private_gpt.server.ingest.ingest_router import IngestResponse 7 | from tests.fixtures.ingest_helper import IngestHelper 8 | 9 | 10 | def test_ingest_accepts_txt_files(ingest_helper: IngestHelper) -> None: 11 | path = Path(__file__).parents[0] / "test.txt" 12 | ingest_result = ingest_helper.ingest_file(path) 13 | assert len(ingest_result.data) == 1 14 | 15 | 16 | def test_ingest_accepts_pdf_files(ingest_helper: IngestHelper) -> None: 17 | path = Path(__file__).parents[0] / "test.pdf" 18 | ingest_result = ingest_helper.ingest_file(path) 19 | assert len(ingest_result.data) == 1 20 | 21 | 22 | def test_ingest_list_returns_something_after_ingestion( 23 | test_client: TestClient, ingest_helper: IngestHelper 24 | ) -> None: 25 | response_before = test_client.get("/v1/ingest/list") 26 | count_ingest_before = len(response_before.json()["data"]) 27 | with tempfile.NamedTemporaryFile("w", suffix=".txt") as test_file: 28 | test_file.write("Foo bar; hello there!") 29 | test_file.flush() 30 | test_file.seek(0) 31 | ingest_result = ingest_helper.ingest_file(Path(test_file.name)) 32 | assert len(ingest_result.data) == 1, "The temp doc should have been ingested" 33 | response_after = test_client.get("/v1/ingest/list") 34 | count_ingest_after = len(response_after.json()["data"]) 35 | assert ( 36 | count_ingest_after == count_ingest_before + 1 37 | ), "The temp doc should be returned" 38 | 39 | 40 | def test_ingest_plain_text(test_client: TestClient) -> None: 41 | response = test_client.post( 42 | "/v1/ingest/text", json={"file_name": "file_name", "text": "text"} 43 | ) 44 | assert response.status_code == 200 45 | ingest_result = IngestResponse.model_validate(response.json()) 46 | assert len(ingest_result.data) == 1 47 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.head_ref || github.ref }} 11 | cancel-in-progress: ${{ github.event_name == 'pull_request' }} 12 | 13 | jobs: 14 | setup: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: ./.github/workflows/actions/install_dependencies 19 | 20 | checks: 21 | needs: setup 22 | runs-on: ubuntu-latest 23 | name: ${{ matrix.quality-command }} 24 | strategy: 25 | matrix: 26 | quality-command: 27 | - black 28 | - ruff 29 | - mypy 30 | steps: 31 | - uses: actions/checkout@v3 32 | - uses: ./.github/workflows/actions/install_dependencies 33 | - name: run ${{ matrix.quality-command }} 34 | run: make ${{ matrix.quality-command }} 35 | 36 | test: 37 | needs: setup 38 | runs-on: ubuntu-latest 39 | name: test 40 | steps: 41 | - uses: actions/checkout@v3 42 | - uses: ./.github/workflows/actions/install_dependencies 43 | - name: run test 44 | run: make test-coverage 45 | # Run even if make test fails for coverage reports 46 | # TODO: select a better xml results displayer 47 | - name: Archive test results coverage results 48 | uses: actions/upload-artifact@v3 49 | if: always() 50 | with: 51 | name: test_results 52 | path: tests-results.xml 53 | - name: Archive code coverage results 54 | uses: actions/upload-artifact@v3 55 | if: always() 56 | with: 57 | name: code-coverage-report 58 | path: htmlcov/ 59 | 60 | all_checks_passed: 61 | # Used to easily force requirements checks in GitHub 62 | needs: 63 | - checks 64 | - test 65 | runs-on: ubuntu-latest 66 | steps: 67 | - run: echo "All checks passed" 68 | -------------------------------------------------------------------------------- /tests/server/utils/test_simple_auth.py: -------------------------------------------------------------------------------- 1 | """Tests to validate that the simple authentication mechanism is working. 2 | 3 | NOTE: We are not testing the switch based on the config in 4 | `private_gpt.server.utils.auth`. This is not done because of the way the code 5 | is currently architecture (it is hard to patch the `settings` and the app while 6 | the tests are directly importing them). 7 | """ 8 | 9 | from typing import Annotated 10 | 11 | import pytest 12 | from fastapi import Depends 13 | from fastapi.testclient import TestClient 14 | 15 | from private_gpt.server.utils.auth import ( 16 | NOT_AUTHENTICATED, 17 | _simple_authentication, 18 | authenticated, 19 | ) 20 | from private_gpt.settings.settings import settings 21 | 22 | 23 | def _copy_simple_authenticated( 24 | _simple_authentication: Annotated[bool, Depends(_simple_authentication)] 25 | ) -> bool: 26 | """Check if the request is authenticated.""" 27 | if not _simple_authentication: 28 | raise NOT_AUTHENTICATED 29 | return True 30 | 31 | 32 | @pytest.fixture(autouse=True) 33 | def _patch_authenticated_dependency(test_client: TestClient): 34 | # Patch the server to use simple authentication 35 | 36 | test_client.app.dependency_overrides[authenticated] = _copy_simple_authenticated 37 | 38 | # Call the actual test 39 | yield 40 | 41 | # Remove the patch for other tests 42 | test_client.app.dependency_overrides = {} 43 | 44 | 45 | def test_default_auth_working_when_enabled_401(test_client: TestClient) -> None: 46 | response = test_client.get("/v1/ingest/list") 47 | assert response.status_code == 401 48 | 49 | 50 | def test_default_auth_working_when_enabled_200(test_client: TestClient) -> None: 51 | response_fail = test_client.get("/v1/ingest/list") 52 | assert response_fail.status_code == 401 53 | 54 | response_success = test_client.get( 55 | "/v1/ingest/list", headers={"Authorization": settings().server.auth.secret} 56 | ) 57 | assert response_success.status_code == 200 58 | -------------------------------------------------------------------------------- /private_gpt/server/chunks/chunks_router.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from fastapi import APIRouter, Depends, Request 4 | from pydantic import BaseModel, Field 5 | 6 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 7 | from private_gpt.server.chunks.chunks_service import Chunk, ChunksService 8 | from private_gpt.server.utils.auth import authenticated 9 | 10 | chunks_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) 11 | 12 | 13 | class ChunksBody(BaseModel): 14 | text: str = Field(examples=["Q3 2023 sales"]) 15 | context_filter: ContextFilter | None = None 16 | limit: int = 10 17 | prev_next_chunks: int = Field(default=0, examples=[2]) 18 | 19 | 20 | class ChunksResponse(BaseModel): 21 | object: Literal["list"] 22 | model: Literal["private-gpt"] 23 | data: list[Chunk] 24 | 25 | 26 | @chunks_router.post("/chunks", tags=["Context Chunks"]) 27 | def chunks_retrieval(request: Request, body: ChunksBody) -> ChunksResponse: 28 | """Given a `text`, returns the most relevant chunks from the ingested documents. 29 | 30 | The returned information can be used to generate prompts that can be 31 | passed to `/completions` or `/chat/completions` APIs. Note: it is usually a very 32 | fast API, because only the Embeddings model is involved, not the LLM. The 33 | returned information contains the relevant chunk `text` together with the source 34 | `document` it is coming from. It also contains a score that can be used to 35 | compare different results. 36 | 37 | The max number of chunks to be returned is set using the `limit` param. 38 | 39 | Previous and next chunks (pieces of text that appear right before or after in the 40 | document) can be fetched by using the `prev_next_chunks` field. 41 | 42 | The documents being used can be filtered using the `context_filter` and passing 43 | the document IDs to be used. Ingested documents IDs can be found using 44 | `/ingest/list` endpoint. If you want all ingested documents to be used, 45 | remove `context_filter` altogether. 46 | """ 47 | service = request.state.injector.get(ChunksService) 48 | results = service.retrieve_relevant( 49 | body.text, body.context_filter, body.limit, body.prev_next_chunks 50 | ) 51 | return ChunksResponse( 52 | object="list", 53 | model="private-gpt", 54 | data=results, 55 | ) 56 | -------------------------------------------------------------------------------- /tests/server/ingest/test.txt: -------------------------------------------------------------------------------- 1 | Once upon a time, in a magical forest called Enchantia, lived a young and cheerful deer named Zumi. Zumi was no ordinary deer; she was bright-eyed, intelligent, and had a heart full of curiosity. One sunny morning, as the forest came alive with the sweet melodies of chirping birds and rustling leaves, Zumi eagerly pranced through the woods on her way to school. 2 | 3 | Enchantia Forest School was a unique place, where all the woodland creatures gathered to learn and grow together. The school was nestled in a clearing surrounded by tall, ancient trees. Zumi loved the feeling of anticipation as she approached the school, her hooves barely touching the ground in excitement. 4 | 5 | As she arrived at the school, her dear friend and classmate, Oliver the wise old owl, greeted her with a friendly hoot. "Good morning, Zumi! Are you ready for another day of adventure and learning?" 6 | 7 | Zumi's eyes sparkled with enthusiasm as she nodded, "Absolutely, Oliver! I can't wait to see what we'll discover today." 8 | 9 | In their classroom, Teacher Willow, a gentle and nurturing willow tree, welcomed the students. The classroom was adorned with vibrant leaves and twinkling fireflies, creating a magical and cozy atmosphere. Today's lesson was about the history of the forest and the importance of living harmoniously with nature. 10 | 11 | The students listened attentively as Teacher Willow recounted stories of ancient times when the forest thrived in unity and peace. Zumi was particularly enthralled by the tales of forest guardians and how they protected the magical balance of Enchantia. 12 | 13 | After the lesson, it was time for recess. Zumi joined her friends in a lively game of tag, where they darted and danced playfully among the trees. Zumi's speed and agility made her an excellent tagger, and laughter filled the air as they played. 14 | 15 | Later, they gathered for an art class, where they expressed themselves through painting and sculpting with clay. Zumi chose to paint a mural of the forest, portraying the beauty and magic they were surrounded by every day. 16 | 17 | As the day came to an end, the students sat in a circle to share stories and reflections. Zumi shared her excitement for the day and how she learned to appreciate the interconnectedness of all creatures in the forest. 18 | 19 | As the sun set, casting a golden glow across the forest, Zumi made her way back home, her heart brimming with happiness and newfound knowledge. Each day at Enchantia Forest School was an adventure, and Zumi couldn't wait to learn more and grow with her friends, for the magic of learning was as boundless as the forest itself. And so, under the canopy of stars and the watchful eyes of the forest, Zumi drifted into dreams filled with wonder and anticipation for the adventures that awaited her on the morrow. -------------------------------------------------------------------------------- /private_gpt/launcher.py: -------------------------------------------------------------------------------- 1 | """FastAPI app creation, logger configuration and main API routes.""" 2 | 3 | import logging 4 | 5 | from fastapi import Depends, FastAPI, Request 6 | from fastapi.middleware.cors import CORSMiddleware 7 | from injector import Injector 8 | from llama_index.core.callbacks import CallbackManager 9 | from llama_index.core.callbacks.global_handlers import create_global_handler 10 | from llama_index.core.settings import Settings as LlamaIndexSettings 11 | 12 | from private_gpt.server.chat.chat_router import chat_router 13 | from private_gpt.server.chunks.chunks_router import chunks_router 14 | from private_gpt.server.completions.completions_router import completions_router 15 | from private_gpt.server.embeddings.embeddings_router import embeddings_router 16 | from private_gpt.server.health.health_router import health_router 17 | from private_gpt.server.ingest.ingest_router import ingest_router 18 | from private_gpt.settings.settings import Settings 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def create_app(root_injector: Injector) -> FastAPI: 24 | 25 | # Start the API 26 | async def bind_injector_to_request(request: Request) -> None: 27 | request.state.injector = root_injector 28 | 29 | app = FastAPI(dependencies=[Depends(bind_injector_to_request)]) 30 | 31 | app.include_router(completions_router) 32 | app.include_router(chat_router) 33 | app.include_router(chunks_router) 34 | app.include_router(ingest_router) 35 | app.include_router(embeddings_router) 36 | app.include_router(health_router) 37 | 38 | # Add LlamaIndex simple observability 39 | global_handler = create_global_handler("simple") 40 | LlamaIndexSettings.callback_manager = CallbackManager([global_handler]) 41 | 42 | settings = root_injector.get(Settings) 43 | if settings.server.cors.enabled: 44 | logger.debug("Setting up CORS middleware") 45 | app.add_middleware( 46 | CORSMiddleware, 47 | allow_credentials=settings.server.cors.allow_credentials, 48 | allow_origins=settings.server.cors.allow_origins, 49 | allow_origin_regex=settings.server.cors.allow_origin_regex, 50 | allow_methods=settings.server.cors.allow_methods, 51 | allow_headers=settings.server.cors.allow_headers, 52 | ) 53 | 54 | if settings.ui.enabled: 55 | logger.debug("Importing the UI module") 56 | try: 57 | from private_gpt.ui.ui import PrivateGptUi 58 | except ImportError as e: 59 | raise ImportError( 60 | "UI dependencies not found, install with `poetry install --extras ui`" 61 | ) from e 62 | 63 | ui = root_injector.get(PrivateGptUi) 64 | ui.mount_in_app(app, settings.ui.path) 65 | 66 | return app 67 | -------------------------------------------------------------------------------- /private_gpt/server/utils/auth.py: -------------------------------------------------------------------------------- 1 | """Authentication mechanism for the API. 2 | 3 | Define a simple mechanism to authenticate requests. 4 | More complex authentication mechanisms can be defined here, and be placed in the 5 | `authenticated` method (being a 'bean' injected in fastapi routers). 6 | 7 | Authorization can also be made after the authentication, and depends on 8 | the authentication. Authorization should not be implemented in this file. 9 | 10 | Authorization can be done by following fastapi's guides: 11 | * https://fastapi.tiangolo.com/advanced/security/oauth2-scopes/ 12 | * https://fastapi.tiangolo.com/tutorial/security/ 13 | * https://fastapi.tiangolo.com/tutorial/dependencies/dependencies-in-path-operation-decorators/ 14 | """ 15 | 16 | # mypy: ignore-errors 17 | # Disabled mypy error: All conditional function variants must have identical signatures 18 | # We are changing the implementation of the authenticated method, based on 19 | # the config. If the auth is not enabled, we are not defining the complex method 20 | # with its dependencies. 21 | import logging 22 | import secrets 23 | from typing import Annotated 24 | 25 | from fastapi import Depends, Header, HTTPException 26 | 27 | from private_gpt.settings.settings import settings 28 | 29 | # 401 signify that the request requires authentication. 30 | # 403 signify that the authenticated user is not authorized to perform the operation. 31 | NOT_AUTHENTICATED = HTTPException( 32 | status_code=401, 33 | detail="Not authenticated", 34 | headers={"WWW-Authenticate": 'Basic realm="All the API", charset="UTF-8"'}, 35 | ) 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | def _simple_authentication(authorization: Annotated[str, Header()] = "") -> bool: 41 | """Check if the request is authenticated.""" 42 | if not secrets.compare_digest(authorization, settings().server.auth.secret): 43 | # If the "Authorization" header is not the expected one, raise an exception. 44 | raise NOT_AUTHENTICATED 45 | return True 46 | 47 | 48 | if not settings().server.auth.enabled: 49 | logger.debug( 50 | "Defining a dummy authentication mechanism for fastapi, always authenticating requests" 51 | ) 52 | 53 | # Define a dummy authentication method that always returns True. 54 | def authenticated() -> bool: 55 | """Check if the request is authenticated.""" 56 | return True 57 | 58 | else: 59 | logger.info("Defining the given authentication mechanism for the API") 60 | 61 | # Method to be used as a dependency to check if the request is authenticated. 62 | def authenticated( 63 | _simple_authentication: Annotated[bool, Depends(_simple_authentication)] 64 | ) -> bool: 65 | """Check if the request is authenticated.""" 66 | assert settings().server.auth.enabled 67 | if not _simple_authentication: 68 | raise NOT_AUTHENTICATED 69 | return True 70 | -------------------------------------------------------------------------------- /private_gpt/components/node_store/node_store_component.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from injector import inject, singleton 4 | from llama_index.core.storage.docstore import BaseDocumentStore, SimpleDocumentStore 5 | from llama_index.core.storage.index_store import SimpleIndexStore 6 | from llama_index.core.storage.index_store.types import BaseIndexStore 7 | 8 | from private_gpt.paths import local_data_path 9 | from private_gpt.settings.settings import Settings 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @singleton 15 | class NodeStoreComponent: 16 | index_store: BaseIndexStore 17 | doc_store: BaseDocumentStore 18 | 19 | @inject 20 | def __init__(self, settings: Settings) -> None: 21 | match settings.nodestore.database: 22 | case "simple": 23 | try: 24 | self.index_store = SimpleIndexStore.from_persist_dir( 25 | persist_dir=str(local_data_path) 26 | ) 27 | except FileNotFoundError: 28 | logger.debug("Local index store not found, creating a new one") 29 | self.index_store = SimpleIndexStore() 30 | 31 | try: 32 | self.doc_store = SimpleDocumentStore.from_persist_dir( 33 | persist_dir=str(local_data_path) 34 | ) 35 | except FileNotFoundError: 36 | logger.debug("Local document store not found, creating a new one") 37 | self.doc_store = SimpleDocumentStore() 38 | 39 | case "postgres": 40 | try: 41 | from llama_index.core.storage.docstore.postgres_docstore import ( 42 | PostgresDocumentStore, 43 | ) 44 | from llama_index.core.storage.index_store.postgres_index_store import ( 45 | PostgresIndexStore, 46 | ) 47 | except ImportError: 48 | raise ImportError( 49 | "Postgres dependencies not found, install with `poetry install --extras storage-nodestore-postgres`" 50 | ) from None 51 | 52 | if settings.postgres is None: 53 | raise ValueError("Postgres index/doc store settings not found.") 54 | 55 | self.index_store = PostgresIndexStore.from_params( 56 | **settings.postgres.model_dump(exclude_none=True) 57 | ) 58 | self.doc_store = PostgresDocumentStore.from_params( 59 | **settings.postgres.model_dump(exclude_none=True) 60 | ) 61 | 62 | case _: 63 | # Should be unreachable 64 | # The settings validator should have caught this 65 | raise ValueError( 66 | f"Database {settings.nodestore.database} not supported" 67 | ) 68 | -------------------------------------------------------------------------------- /fern/docs/pages/manual/nodestore.mdx: -------------------------------------------------------------------------------- 1 | ## NodeStores 2 | PrivateGPT supports **Simple** and [Postgres](https://www.postgresql.org/) providers. Simple being the default. 3 | 4 | In order to select one or the other, set the `nodestore.database` property in the `settings.yaml` file to `simple` or `postgres`. 5 | 6 | ```yaml 7 | nodestore: 8 | database: simple 9 | ``` 10 | 11 | ### Simple Document Store 12 | 13 | Setting up simple document store: Persist data with in-memory and disk storage. 14 | 15 | Enabling the simple document store is an excellent choice for small projects or proofs of concept where you need to persist data while maintaining minimal setup complexity. To get started, set the nodestore.database property in your settings.yaml file as follows: 16 | 17 | ```yaml 18 | nodestore: 19 | database: simple 20 | ``` 21 | The beauty of the simple document store is its flexibility and ease of implementation. It provides a solid foundation for managing and retrieving data without the need for complex setup or configuration. The combination of in-memory processing and disk persistence ensures that you can efficiently handle small to medium-sized datasets while maintaining data consistency across runs. 22 | 23 | ### Postgres Document Store 24 | 25 | To enable Postgres, set the `nodestore.database` property in the `settings.yaml` file to `postgres` and install the `storage-nodestore-postgres` extra. Note: Vector Embeddings Storage in Postgres is configured separately 26 | 27 | ```bash 28 | poetry install --extras storage-nodestore-postgres 29 | ``` 30 | 31 | The available configuration options are: 32 | | Field | Description | 33 | |---------------|-----------------------------------------------------------| 34 | | **host** | The server hosting the Postgres database. Default is `localhost` | 35 | | **port** | The port on which the Postgres database is accessible. Default is `5432` | 36 | | **database** | The specific database to connect to. Default is `postgres` | 37 | | **user** | The username for database access. Default is `postgres` | 38 | | **password** | The password for database access. (Required) | 39 | | **schema_name** | The database schema to use. Default is `private_gpt` | 40 | 41 | For example: 42 | ```yaml 43 | nodestore: 44 | database: postgres 45 | 46 | postgres: 47 | host: localhost 48 | port: 5432 49 | database: postgres 50 | user: postgres 51 | password: 52 | schema_name: private_gpt 53 | ``` 54 | 55 | Given the above configuration, Two PostgreSQL tables will be created upon successful connection: one for storing metadata related to the index and another for document data itself. 56 | 57 | ``` 58 | postgres=# \dt private_gpt.* 59 | List of relations 60 | Schema | Name | Type | Owner 61 | -------------+-----------------+-------+-------------- 62 | private_gpt | data_docstore | table | postgres 63 | private_gpt | data_indexstore | table | postgres 64 | 65 | postgres=# 66 | ``` 67 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Any args passed to the make script, use with $(call args, default_value) 2 | args = `arg="$(filter-out $@,$(MAKECMDGOALS))" && echo $${arg:-${1}}` 3 | 4 | ######################################################################################################################## 5 | # Quality checks 6 | ######################################################################################################################## 7 | 8 | test: 9 | PYTHONPATH=. poetry run pytest tests 10 | 11 | test-coverage: 12 | PYTHONPATH=. poetry run pytest tests --cov private_gpt --cov-report term --cov-report=html --cov-report xml --junit-xml=tests-results.xml 13 | 14 | black: 15 | poetry run black . --check 16 | 17 | ruff: 18 | poetry run ruff check private_gpt tests 19 | 20 | format: 21 | poetry run black . 22 | poetry run ruff check private_gpt tests --fix 23 | 24 | mypy: 25 | poetry run mypy private_gpt 26 | 27 | check: 28 | make format 29 | make mypy 30 | 31 | ######################################################################################################################## 32 | # Run 33 | ######################################################################################################################## 34 | 35 | run: 36 | poetry run python -m private_gpt 37 | 38 | dev-windows: 39 | (set PGPT_PROFILES=local & poetry run python -m uvicorn private_gpt.main:app --reload --port 8001) 40 | 41 | dev: 42 | PYTHONUNBUFFERED=1 PGPT_PROFILES=local poetry run python -m uvicorn private_gpt.main:app --reload --port 8001 43 | 44 | ######################################################################################################################## 45 | # Misc 46 | ######################################################################################################################## 47 | 48 | api-docs: 49 | PGPT_PROFILES=mock poetry run python scripts/extract_openapi.py private_gpt.main:app --out fern/openapi/openapi.json 50 | 51 | ingest: 52 | @poetry run python scripts/ingest_folder.py $(call args) 53 | 54 | stats: 55 | poetry run python scripts/utils.py stats 56 | 57 | wipe: 58 | poetry run python scripts/utils.py wipe 59 | 60 | setup: 61 | poetry run python scripts/setup 62 | 63 | list: 64 | @echo "Available commands:" 65 | @echo " test : Run tests using pytest" 66 | @echo " test-coverage : Run tests with coverage report" 67 | @echo " black : Check code format with black" 68 | @echo " ruff : Check code with ruff" 69 | @echo " format : Format code with black and ruff" 70 | @echo " mypy : Run mypy for type checking" 71 | @echo " check : Run format and mypy commands" 72 | @echo " run : Run the application" 73 | @echo " dev-windows : Run the application in development mode on Windows" 74 | @echo " dev : Run the application in development mode" 75 | @echo " api-docs : Generate API documentation" 76 | @echo " ingest : Ingest data using specified script" 77 | @echo " wipe : Wipe data using specified script" 78 | @echo " setup : Setup the application" 79 | -------------------------------------------------------------------------------- /private_gpt/components/embedding/custom/sagemaker.py: -------------------------------------------------------------------------------- 1 | # mypy: ignore-errors 2 | import json 3 | from typing import Any 4 | 5 | import boto3 6 | from llama_index.core.base.embeddings.base import BaseEmbedding 7 | from pydantic import Field, PrivateAttr 8 | 9 | 10 | class SagemakerEmbedding(BaseEmbedding): 11 | """Sagemaker Embedding Endpoint. 12 | 13 | To use, you must supply the endpoint name from your deployed 14 | Sagemaker embedding model & the region where it is deployed. 15 | 16 | To authenticate, the AWS client uses the following methods to 17 | automatically load credentials: 18 | https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html 19 | 20 | If a specific credential profile should be used, you must pass 21 | the name of the profile from the ~/.aws/credentials file that is to be used. 22 | 23 | Make sure the credentials / roles used have the required policies to 24 | access the Sagemaker endpoint. 25 | See: https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html 26 | """ 27 | 28 | endpoint_name: str = Field(description="") 29 | 30 | _boto_client: Any = boto3.client( 31 | "sagemaker-runtime", 32 | ) # TODO make it an optional field 33 | 34 | _async_not_implemented_warned: bool = PrivateAttr(default=False) 35 | 36 | @classmethod 37 | def class_name(cls) -> str: 38 | return "SagemakerEmbedding" 39 | 40 | def _async_not_implemented_warn_once(self) -> None: 41 | if not self._async_not_implemented_warned: 42 | print("Async embedding not available, falling back to sync method.") 43 | self._async_not_implemented_warned = True 44 | 45 | def _embed(self, sentences: list[str]) -> list[list[float]]: 46 | request_params = { 47 | "inputs": sentences, 48 | } 49 | 50 | resp = self._boto_client.invoke_endpoint( 51 | EndpointName=self.endpoint_name, 52 | Body=json.dumps(request_params), 53 | ContentType="application/json", 54 | ) 55 | 56 | response_body = resp["Body"] 57 | response_str = response_body.read().decode("utf-8") 58 | response_json = json.loads(response_str) 59 | 60 | return response_json["vectors"] 61 | 62 | def _get_query_embedding(self, query: str) -> list[float]: 63 | """Get query embedding.""" 64 | return self._embed([query])[0] 65 | 66 | async def _aget_query_embedding(self, query: str) -> list[float]: 67 | # Warn the user that sync is being used 68 | self._async_not_implemented_warn_once() 69 | return self._get_query_embedding(query) 70 | 71 | async def _aget_text_embedding(self, text: str) -> list[float]: 72 | # Warn the user that sync is being used 73 | self._async_not_implemented_warn_once() 74 | return self._get_text_embedding(text) 75 | 76 | def _get_text_embedding(self, text: str) -> list[float]: 77 | """Get text embedding.""" 78 | return self._embed([text])[0] 79 | 80 | def _get_text_embeddings(self, texts: list[str]) -> list[list[float]]: 81 | """Get text embeddings.""" 82 | return self._embed(texts) 83 | -------------------------------------------------------------------------------- /fern/docs/pages/manual/ui.mdx: -------------------------------------------------------------------------------- 1 | ## Gradio UI user manual 2 | 3 | Gradio UI is a ready to use way of testing most of PrivateGPT API functionalities. 4 | 5 | ![Gradio PrivateGPT](https://lh3.googleusercontent.com/drive-viewer/AK7aPaD_Hc-A8A9ooMe-hPgm_eImgsbxAjb__8nFYj8b_WwzvL1Gy90oAnp1DfhPaN6yGiEHCOXs0r77W1bYHtPzlVwbV7fMsA=s1600) 6 | 7 | ### Execution Modes 8 | 9 | It has 3 modes of execution (you can select in the top-left): 10 | 11 | * Query Docs: uses the context from the 12 | ingested documents to answer the questions posted in the chat. It also takes 13 | into account previous chat messages as context. 14 | * Makes use of `/chat/completions` API with `use_context=true` and no 15 | `context_filter`. 16 | * Search in Docs: fast search that returns the 4 most related text 17 | chunks, together with their source document and page. 18 | * Makes use of `/chunks` API with no `context_filter`, `limit=4` and 19 | `prev_next_chunks=0`. 20 | * LLM Chat: simple, non-contextual chat with the LLM. The ingested documents won't 21 | be taken into account, only the previous messages. 22 | * Makes use of `/chat/completions` API with `use_context=false`. 23 | 24 | ### Document Ingestion 25 | 26 | Ingest documents by using the `Upload a File` button. You can check the progress of 27 | the ingestion in the console logs of the server. 28 | 29 | The list of ingested files is shown below the button. 30 | 31 | If you want to delete the ingested documents, refer to *Reset Local documents 32 | database* section in the documentation. 33 | 34 | ### Chat 35 | 36 | Normal chat interface, self-explanatory ;) 37 | 38 | #### System Prompt 39 | You can view and change the system prompt being passed to the LLM by clicking "Additional Inputs" 40 | in the chat interface. The system prompt is also logged on the server. 41 | 42 | By default, the `Query Docs` mode uses the setting value `ui.default_query_system_prompt`. 43 | 44 | The `LLM Chat` mode attempts to use the optional settings value `ui.default_chat_system_prompt`. 45 | 46 | If no system prompt is entered, the UI will display the default system prompt being used 47 | for the active mode. 48 | 49 | ##### System Prompt Examples: 50 | 51 | The system prompt can effectively provide your chat bot specialized roles, and results tailored to the prompt 52 | you have given the model. Examples of system prompts can be be found 53 | [here](https://www.w3schools.com/gen_ai/chatgpt-3-5/chatgpt-3-5_roles.php). 54 | 55 | Some interesting examples to try include: 56 | 57 | * You are -X-. You have all the knowledge and personality of -X-. Answer as if you were -X- using 58 | their manner of speaking and vocabulary. 59 | * Example: You are Shakespeare. You have all the knowledge and personality of Shakespeare. 60 | Answer as if you were Shakespeare using their manner of speaking and vocabulary. 61 | * You are an expert (at) -role-. Answer all questions using your expertise on -specific domain topic-. 62 | * Example: You are an expert software engineer. Answer all questions using your expertise on Python. 63 | * You are a -role- bot, respond with -response criteria needed-. If no -response criteria- is needed, 64 | respond with -alternate response-. 65 | * Example: You are a grammar checking bot, respond with any grammatical corrections needed. If no corrections 66 | are needed, respond with "verified". -------------------------------------------------------------------------------- /private_gpt/server/completions/completions_router.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, Request 2 | from pydantic import BaseModel 3 | from starlette.responses import StreamingResponse 4 | 5 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 6 | from private_gpt.open_ai.openai_models import ( 7 | OpenAICompletion, 8 | OpenAIMessage, 9 | ) 10 | from private_gpt.server.chat.chat_router import ChatBody, chat_completion 11 | from private_gpt.server.utils.auth import authenticated 12 | 13 | completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) 14 | 15 | 16 | class CompletionsBody(BaseModel): 17 | prompt: str 18 | system_prompt: str | None = None 19 | use_context: bool = False 20 | context_filter: ContextFilter | None = None 21 | include_sources: bool = True 22 | stream: bool = False 23 | 24 | model_config = { 25 | "json_schema_extra": { 26 | "examples": [ 27 | { 28 | "prompt": "How do you fry an egg?", 29 | "system_prompt": "You are a rapper. Always answer with a rap.", 30 | "stream": False, 31 | "use_context": False, 32 | "include_sources": False, 33 | } 34 | ] 35 | } 36 | } 37 | 38 | 39 | @completions_router.post( 40 | "/completions", 41 | response_model=None, 42 | summary="Completion", 43 | responses={200: {"model": OpenAICompletion}}, 44 | tags=["Contextual Completions"], 45 | openapi_extra={ 46 | "x-fern-streaming": { 47 | "stream-condition": "stream", 48 | "response": {"$ref": "#/components/schemas/OpenAICompletion"}, 49 | "response-stream": {"$ref": "#/components/schemas/OpenAICompletion"}, 50 | } 51 | }, 52 | ) 53 | def prompt_completion( 54 | request: Request, body: CompletionsBody 55 | ) -> OpenAICompletion | StreamingResponse: 56 | """We recommend most users use our Chat completions API. 57 | 58 | Given a prompt, the model will return one predicted completion. 59 | 60 | Optionally include a `system_prompt` to influence the way the LLM answers. 61 | 62 | If `use_context` 63 | is set to `true`, the model will use context coming from the ingested documents 64 | to create the response. The documents being used can be filtered using the 65 | `context_filter` and passing the document IDs to be used. Ingested documents IDs 66 | can be found using `/ingest/list` endpoint. If you want all ingested documents to 67 | be used, remove `context_filter` altogether. 68 | 69 | When using `'include_sources': true`, the API will return the source Chunks used 70 | to create the response, which come from the context provided. 71 | 72 | When using `'stream': true`, the API will return data chunks following [OpenAI's 73 | streaming model](https://platform.openai.com/docs/api-reference/chat/streaming): 74 | ``` 75 | {"id":"12345","object":"completion.chunk","created":1694268190, 76 | "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"}, 77 | "finish_reason":null}]} 78 | ``` 79 | """ 80 | messages = [OpenAIMessage(content=body.prompt, role="user")] 81 | # If system prompt is passed, create a fake message with the system prompt. 82 | if body.system_prompt: 83 | messages.insert(0, OpenAIMessage(content=body.system_prompt, role="system")) 84 | 85 | chat_body = ChatBody( 86 | messages=messages, 87 | use_context=body.use_context, 88 | stream=body.stream, 89 | include_sources=body.include_sources, 90 | context_filter=body.context_filter, 91 | ) 92 | return chat_completion(request, chat_body) 93 | -------------------------------------------------------------------------------- /fern/docs/pages/manual/settings.mdx: -------------------------------------------------------------------------------- 1 | # Settings and profiles for your private GPT 2 | 3 | The configuration of your private GPT server is done thanks to `settings` files (more precisely `settings.yaml`). 4 | These text files are written using the [YAML](https://en.wikipedia.org/wiki/YAML) syntax. 5 | 6 | While privateGPT is distributing safe and universal configuration files, you might want to quickly customize your 7 | privateGPT, and this can be done using the `settings` files. 8 | 9 | This project is defining the concept of **profiles** (or configuration profiles). 10 | This mechanism, using your environment variables, is giving you the ability to easily switch between 11 | configuration you've made. 12 | 13 | A typical use case of profile is to easily switch between LLM and embeddings. 14 | To be a bit more precise, you can change the language (to French, Spanish, Italian, English, etc) by simply changing 15 | the profile you've selected; no code changes required! 16 | 17 | PrivateGPT is configured through *profiles* that are defined using yaml files, and selected through env variables. 18 | The full list of properties configurable can be found in `settings.yaml`. 19 | 20 | ## How to know which profiles exist 21 | Given that a profile `foo_bar` points to the file `settings-foo_bar.yaml` and vice-versa, you simply have to look 22 | at the files starting with `settings` and ending in `.yaml`. 23 | 24 | ## How to use an existing profiles 25 | **Please note that the syntax to set the value of an environment variables depends on your OS**. 26 | You have to set environment variable `PGPT_PROFILES` to the name of the profile you want to use. 27 | 28 | For example, on **linux and macOS**, this gives: 29 | ```bash 30 | export PGPT_PROFILES=my_profile_name_here 31 | ``` 32 | 33 | Windows Command Prompt (cmd) has a different syntax: 34 | ```shell 35 | set PGPT_PROFILES=my_profile_name_here 36 | ``` 37 | 38 | Windows Powershell has a different syntax: 39 | ```shell 40 | $env:PGPT_PROFILES="my_profile_name_here" 41 | ``` 42 | If the above is not working, you might want to try other ways to set an env variable in your window's terminal. 43 | 44 | --- 45 | 46 | Once you've set this environment variable to the desired profile, you can simply launch your privateGPT, 47 | and it will run using your profile on top of the default configuration. 48 | 49 | ## Reference 50 | Additional details on the profiles are described in this section 51 | 52 | ### Environment variable `PGPT_SETTINGS_FOLDER` 53 | 54 | The location of the settings folder. Defaults to the root of the project. 55 | Should contain the default `settings.yaml` and any other `settings-{profile}.yaml`. 56 | 57 | ### Environment variable `PGPT_PROFILES` 58 | 59 | By default, the profile definition in `settings.yaml` is loaded. 60 | Using this env var you can load additional profiles; format is a comma separated list of profile names. 61 | This will merge `settings-{profile}.yaml` on top of the base settings file. 62 | 63 | For example: 64 | `PGPT_PROFILES=local,cuda` will load `settings-local.yaml` 65 | and `settings-cuda.yaml`, their contents will be merged with 66 | later profiles properties overriding values of earlier ones like `settings.yaml`. 67 | 68 | During testing, the `test` profile will be active along with the default, therefore `settings-test.yaml` 69 | file is required. 70 | 71 | ### Environment variables expansion 72 | 73 | Configuration files can contain environment variables, 74 | they will be expanded at runtime. 75 | 76 | Expansion must follow the pattern `${VARIABLE_NAME:default_value}`. 77 | 78 | For example, the following configuration will use the value of the `PORT` 79 | environment variable or `8001` if it's not set. 80 | Missing variables with no default will produce an error. 81 | 82 | ```yaml 83 | server: 84 | port: ${PORT:8001} 85 | ``` -------------------------------------------------------------------------------- /private_gpt/components/vector_store/batched_chroma.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Generator 2 | from typing import Any 3 | 4 | from llama_index.core.schema import BaseNode, MetadataMode 5 | from llama_index.core.vector_stores.utils import node_to_metadata_dict 6 | from llama_index.vector_stores.chroma import ChromaVectorStore # type: ignore 7 | 8 | 9 | def chunk_list( 10 | lst: list[BaseNode], max_chunk_size: int 11 | ) -> Generator[list[BaseNode], None, None]: 12 | """Yield successive max_chunk_size-sized chunks from lst. 13 | 14 | Args: 15 | lst (List[BaseNode]): list of nodes with embeddings 16 | max_chunk_size (int): max chunk size 17 | 18 | Yields: 19 | Generator[List[BaseNode], None, None]: list of nodes with embeddings 20 | """ 21 | for i in range(0, len(lst), max_chunk_size): 22 | yield lst[i : i + max_chunk_size] 23 | 24 | 25 | class BatchedChromaVectorStore(ChromaVectorStore): # type: ignore 26 | """Chroma vector store, batching additions to avoid reaching the max batch limit. 27 | 28 | In this vector store, embeddings are stored within a ChromaDB collection. 29 | 30 | During query time, the index uses ChromaDB to query for the top 31 | k most similar nodes. 32 | 33 | Args: 34 | chroma_client (from chromadb.api.API): 35 | API instance 36 | chroma_collection (chromadb.api.models.Collection.Collection): 37 | ChromaDB collection instance 38 | 39 | """ 40 | 41 | chroma_client: Any | None 42 | 43 | def __init__( 44 | self, 45 | chroma_client: Any, 46 | chroma_collection: Any, 47 | host: str | None = None, 48 | port: str | None = None, 49 | ssl: bool = False, 50 | headers: dict[str, str] | None = None, 51 | collection_kwargs: dict[Any, Any] | None = None, 52 | ) -> None: 53 | super().__init__( 54 | chroma_collection=chroma_collection, 55 | host=host, 56 | port=port, 57 | ssl=ssl, 58 | headers=headers, 59 | collection_kwargs=collection_kwargs or {}, 60 | ) 61 | self.chroma_client = chroma_client 62 | 63 | def add(self, nodes: list[BaseNode], **add_kwargs: Any) -> list[str]: 64 | """Add nodes to index, batching the insertion to avoid issues. 65 | 66 | Args: 67 | nodes: List[BaseNode]: list of nodes with embeddings 68 | add_kwargs: _ 69 | """ 70 | if not self.chroma_client: 71 | raise ValueError("Client not initialized") 72 | 73 | if not self._collection: 74 | raise ValueError("Collection not initialized") 75 | 76 | max_chunk_size = self.chroma_client.max_batch_size 77 | node_chunks = chunk_list(nodes, max_chunk_size) 78 | 79 | all_ids = [] 80 | for node_chunk in node_chunks: 81 | embeddings = [] 82 | metadatas = [] 83 | ids = [] 84 | documents = [] 85 | for node in node_chunk: 86 | embeddings.append(node.get_embedding()) 87 | metadatas.append( 88 | node_to_metadata_dict( 89 | node, remove_text=True, flat_metadata=self.flat_metadata 90 | ) 91 | ) 92 | ids.append(node.node_id) 93 | documents.append(node.get_content(metadata_mode=MetadataMode.NONE)) 94 | 95 | self._collection.add( 96 | embeddings=embeddings, 97 | ids=ids, 98 | metadatas=metadatas, 99 | documents=documents, 100 | ) 101 | all_ids.extend(ids) 102 | 103 | return all_ids 104 | -------------------------------------------------------------------------------- /fern/docs.yml: -------------------------------------------------------------------------------- 1 | # Main Fern configuration file 2 | instances: 3 | - url: privategpt.docs.buildwithfern.com 4 | custom-domain: docs.privategpt.dev 5 | 6 | title: PrivateGPT | Docs 7 | 8 | # The tabs definition, in the top left corner 9 | tabs: 10 | overview: 11 | display-name: Overview 12 | icon: "fa-solid fa-home" 13 | installation: 14 | display-name: Installation 15 | icon: "fa-solid fa-download" 16 | manual: 17 | display-name: Manual 18 | icon: "fa-solid fa-book" 19 | recipes: 20 | display-name: Recipes 21 | icon: "fa-solid fa-flask" 22 | api-reference: 23 | display-name: API Reference 24 | icon: "fa-solid fa-file-contract" 25 | 26 | # Definition of tabs contents, will be displayed on the left side of the page, below all tabs 27 | navigation: 28 | # The default tab 29 | - tab: overview 30 | layout: 31 | - section: Welcome 32 | contents: 33 | - page: Introduction 34 | path: ./docs/pages/overview/welcome.mdx 35 | # How to install privateGPT, with FAQ and troubleshooting 36 | - tab: installation 37 | layout: 38 | - section: Getting started 39 | contents: 40 | - page: Main Concepts 41 | path: ./docs/pages/installation/concepts.mdx 42 | - page: Installation 43 | path: ./docs/pages/installation/installation.mdx 44 | # Manual of privateGPT: how to use it and configure it 45 | - tab: manual 46 | layout: 47 | - section: General configuration 48 | contents: 49 | - page: Configuration 50 | path: ./docs/pages/manual/settings.mdx 51 | - section: Document management 52 | contents: 53 | - page: Ingestion 54 | path: ./docs/pages/manual/ingestion.mdx 55 | - page: Deletion 56 | path: ./docs/pages/manual/ingestion-reset.mdx 57 | - section: Storage 58 | contents: 59 | - page: Vector Stores 60 | path: ./docs/pages/manual/vectordb.mdx 61 | - page: Node Stores 62 | path: ./docs/pages/manual/nodestore.mdx 63 | - section: Advanced Setup 64 | contents: 65 | - page: LLM Backends 66 | path: ./docs/pages/manual/llms.mdx 67 | - page: Reranking 68 | path: ./docs/pages/manual/reranker.mdx 69 | - section: User Interface 70 | contents: 71 | - page: User interface (Gradio) Manual 72 | path: ./docs/pages/manual/ui.mdx 73 | # Small code snippet or example of usage to help users 74 | - tab: recipes 75 | layout: 76 | - section: Choice of LLM 77 | contents: 78 | # TODO: add recipes 79 | - page: List of LLMs 80 | path: ./docs/pages/recipes/list-llm.mdx 81 | # More advanced usage of privateGPT, by API 82 | - tab: api-reference 83 | layout: 84 | - section: Overview 85 | contents: 86 | - page : API Reference overview 87 | path: ./docs/pages/api-reference/api-reference.mdx 88 | - page: SDKs 89 | path: ./docs/pages/api-reference/sdks.mdx 90 | - api: API Reference 91 | 92 | # Definition of the navbar, will be displayed in the top right corner. 93 | # `type:primary` is always displayed at the most right side of the navbar 94 | navbar-links: 95 | - type: secondary 96 | text: Contact us 97 | url: "mailto:hello@zylon.ai" 98 | - type: github 99 | value: "https://github.com/zylon-ai/private-gpt" 100 | - type: primary 101 | text: Join the Discord 102 | url: https://discord.com/invite/bK6mRVpErU 103 | 104 | colors: 105 | accentPrimary: 106 | dark: "#C6BBFF" 107 | light: "#756E98" 108 | 109 | logo: 110 | dark: ./docs/assets/logo_light.png 111 | light: ./docs/assets/logo_dark.png 112 | height: 50 113 | 114 | favicon: ./docs/assets/favicon.ico 115 | -------------------------------------------------------------------------------- /fern/docs/pages/installation/concepts.mdx: -------------------------------------------------------------------------------- 1 | PrivateGPT is a service that wraps a set of AI RAG primitives in a comprehensive set of APIs providing a private, secure, customizable and easy to use GenAI development framework. 2 | 3 | It uses FastAPI and LLamaIndex as its core frameworks. Those can be customized by changing the codebase itself. 4 | 5 | It supports a variety of LLM providers, embeddings providers, and vector stores, both local and remote. Those can be easily changed without changing the codebase. 6 | 7 | # Different Setups support 8 | 9 | ## Setup configurations available 10 | You get to decide the setup for these 3 main components: 11 | - LLM: the large language model provider used for inference. It can be local, or remote, or even OpenAI. 12 | - Embeddings: the embeddings provider used to encode the input, the documents and the users' queries. Same as the LLM, it can be local, or remote, or even OpenAI. 13 | - Vector store: the store used to index and retrieve the documents. 14 | 15 | There is an extra component that can be enabled or disabled: the UI. It is a Gradio UI that allows to interact with the API in a more user-friendly way. 16 | 17 | ### Setups and Dependencies 18 | Your setup will be the combination of the different options available. You'll find recommended setups in the [installation](./installation) section. 19 | PrivateGPT uses poetry to manage its dependencies. You can install the dependencies for the different setups by running `poetry install --extras " ..."`. 20 | Extras are the different options available for each component. For example, to install the dependencies for a a local setup with UI and qdrant as vector database, Ollama as LLM and HuggingFace as local embeddings, you would run 21 | 22 | `poetry install --extras "ui vector-stores-qdrant llms-ollama embeddings-huggingface"`. 23 | 24 | Refer to the [installation](./installation) section for more details. 25 | 26 | ### Setups and Configuration 27 | PrivateGPT uses yaml to define its configuration in files named `settings-.yaml`. 28 | Different configuration files can be created in the root directory of the project. 29 | PrivateGPT will load the configuration at startup from the profile specified in the `PGPT_PROFILES` environment variable. 30 | For example, running: 31 | ```bash 32 | PGPT_PROFILES=ollama make run 33 | ``` 34 | will load the configuration from `settings.yaml` and `settings-ollama.yaml`. 35 | - `settings.yaml` is always loaded and contains the default configuration. 36 | - `settings-ollama.yaml` is loaded if the `ollama` profile is specified in the `PGPT_PROFILES` environment variable. It can override configuration from the default `settings.yaml` 37 | 38 | ## About Fully Local Setups 39 | In order to run PrivateGPT in a fully local setup, you will need to run the LLM, Embeddings and Vector Store locally. 40 | ### Vector stores 41 | The vector stores supported (Qdrant, ChromaDB and Postgres) run locally by default. 42 | ### Embeddings 43 | For local Embeddings there are two options: 44 | * (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs. 45 | * You can use the 'embeddings-huggingface' option in PrivateGPT, which will use HuggingFace. 46 | 47 | In order for HuggingFace LLM to work (the second option), you need to download the embeddings model to the `models` folder. You can do so by running the `setup` script: 48 | ```bash 49 | poetry run python scripts/setup 50 | ``` 51 | 52 | ### LLM 53 | For local LLM there are two options: 54 | * (Recommended) You can use the 'ollama' option in PrivateGPT, which will connect to your local Ollama instance. Ollama simplifies a lot the installation of local LLMs. 55 | * You can use the 'llms-llama-cpp' option in PrivateGPT, which will use LlamaCPP. It works great on Mac with Metal most of the times (leverages Metal GPU), but it can be tricky in certain Linux and Windows distributions, depending on the GPU. In the installation document you'll find guides and troubleshooting. 56 | 57 | In order for LlamaCPP powered LLM to work (the second option), you need to download the LLM model to the `models` folder. You can do so by running the `setup` script: 58 | ```bash 59 | poetry run python scripts/setup 60 | ``` 61 | -------------------------------------------------------------------------------- /private_gpt/open_ai/openai_models.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | from collections.abc import Iterator 4 | from typing import Literal 5 | 6 | from llama_index.core.llms import ChatResponse, CompletionResponse 7 | from pydantic import BaseModel, Field 8 | 9 | from private_gpt.server.chunks.chunks_service import Chunk 10 | 11 | 12 | class OpenAIDelta(BaseModel): 13 | """A piece of completion that needs to be concatenated to get the full message.""" 14 | 15 | content: str | None 16 | 17 | 18 | class OpenAIMessage(BaseModel): 19 | """Inference result, with the source of the message. 20 | 21 | Role could be the assistant or system 22 | (providing a default response, not AI generated). 23 | """ 24 | 25 | role: Literal["assistant", "system", "user"] = Field(default="user") 26 | content: str | None 27 | 28 | 29 | class OpenAIChoice(BaseModel): 30 | """Response from AI. 31 | 32 | Either the delta or the message will be present, but never both. 33 | Sources used will be returned in case context retrieval was enabled. 34 | """ 35 | 36 | finish_reason: str | None = Field(examples=["stop"]) 37 | delta: OpenAIDelta | None = None 38 | message: OpenAIMessage | None = None 39 | sources: list[Chunk] | None = None 40 | index: int = 0 41 | 42 | 43 | class OpenAICompletion(BaseModel): 44 | """Clone of OpenAI Completion model. 45 | 46 | For more information see: https://platform.openai.com/docs/api-reference/chat/object 47 | """ 48 | 49 | id: str 50 | object: Literal["completion", "completion.chunk"] = Field(default="completion") 51 | created: int = Field(..., examples=[1623340000]) 52 | model: Literal["private-gpt"] 53 | choices: list[OpenAIChoice] 54 | 55 | @classmethod 56 | def from_text( 57 | cls, 58 | text: str | None, 59 | finish_reason: str | None = None, 60 | sources: list[Chunk] | None = None, 61 | ) -> "OpenAICompletion": 62 | return OpenAICompletion( 63 | id=str(uuid.uuid4()), 64 | object="completion", 65 | created=int(time.time()), 66 | model="private-gpt", 67 | choices=[ 68 | OpenAIChoice( 69 | message=OpenAIMessage(role="assistant", content=text), 70 | finish_reason=finish_reason, 71 | sources=sources, 72 | ) 73 | ], 74 | ) 75 | 76 | @classmethod 77 | def json_from_delta( 78 | cls, 79 | *, 80 | text: str | None, 81 | finish_reason: str | None = None, 82 | sources: list[Chunk] | None = None, 83 | ) -> str: 84 | chunk = OpenAICompletion( 85 | id=str(uuid.uuid4()), 86 | object="completion.chunk", 87 | created=int(time.time()), 88 | model="private-gpt", 89 | choices=[ 90 | OpenAIChoice( 91 | delta=OpenAIDelta(content=text), 92 | finish_reason=finish_reason, 93 | sources=sources, 94 | ) 95 | ], 96 | ) 97 | 98 | return chunk.model_dump_json() 99 | 100 | 101 | def to_openai_response( 102 | response: str | ChatResponse, sources: list[Chunk] | None = None 103 | ) -> OpenAICompletion: 104 | if isinstance(response, ChatResponse): 105 | return OpenAICompletion.from_text(response.delta, finish_reason="stop") 106 | else: 107 | return OpenAICompletion.from_text( 108 | response, finish_reason="stop", sources=sources 109 | ) 110 | 111 | 112 | def to_openai_sse_stream( 113 | response_generator: Iterator[str | CompletionResponse | ChatResponse], 114 | sources: list[Chunk] | None = None, 115 | ) -> Iterator[str]: 116 | for response in response_generator: 117 | if isinstance(response, CompletionResponse | ChatResponse): 118 | yield f"data: {OpenAICompletion.json_from_delta(text=response.delta)}\n\n" 119 | else: 120 | yield f"data: {OpenAICompletion.json_from_delta(text=response, sources=sources)}\n\n" 121 | yield f"data: {OpenAICompletion.json_from_delta(text='', finish_reason='stop')}\n\n" 122 | yield "data: [DONE]\n\n" 123 | -------------------------------------------------------------------------------- /fern/docs/pages/recipes/list-llm.mdx: -------------------------------------------------------------------------------- 1 | # List of working LLM 2 | 3 | **Do you have any working combination of LLM and embeddings?** 4 | Please open a PR to add it to the list, and come on our Discord to tell us about it! 5 | 6 | ## Prompt style 7 | 8 | LLMs might have been trained with different prompt styles. 9 | The prompt style is the way the prompt is written, and how the system message is injected in the prompt. 10 | 11 | For example, `llama2` looks like this: 12 | ```text 13 | [INST] <> 14 | {{ system_prompt }} 15 | <> 16 | 17 | {{ user_message }} [/INST] 18 | ``` 19 | 20 | While `default` (the `llama_index` default) looks like this: 21 | ```text 22 | system: {{ system_prompt }} 23 | user: {{ user_message }} 24 | assistant: {{ assistant_message }} 25 | ``` 26 | 27 | The "`tag`" style looks like this: 28 | 29 | ```text 30 | <|system|>: {{ system_prompt }} 31 | <|user|>: {{ user_message }} 32 | <|assistant|>: {{ assistant_message }} 33 | ``` 34 | 35 | The "`mistral`" style looks like this: 36 | 37 | ```text 38 | [INST] You are an AI assistant. [/INST][INST] Hello, how are you doing? [/INST] 39 | ``` 40 | 41 | The "`chatml`" style looks like this: 42 | ```text 43 | <|im_start|>system 44 | {{ system_prompt }}<|im_end|> 45 | <|im_start|>user" 46 | {{ user_message }}<|im_end|> 47 | <|im_start|>assistant 48 | {{ assistant_message }} 49 | ``` 50 | 51 | Some LLMs will not understand these prompt styles, and will not work (returning nothing). 52 | You can try to change the prompt style to `default` (or `tag`) in the settings, and it will 53 | change the way the messages are formatted to be passed to the LLM. 54 | 55 | ## Example of configuration 56 | 57 | You might want to change the prompt depending on the language and model you are using. 58 | 59 | ### English, with instructions 60 | 61 | `settings-en.yaml`: 62 | ```yml 63 | local: 64 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.1-GGUF 65 | llm_hf_model_file: mistral-7b-instruct-v0.1.Q4_K_M.gguf 66 | embedding_hf_model_name: BAAI/bge-small-en-v1.5 67 | prompt_style: "llama2" 68 | ``` 69 | 70 | ### French, with instructions 71 | 72 | `settings-fr.yaml`: 73 | ```yml 74 | local: 75 | llm_hf_repo_id: TheBloke/Vigogne-2-7B-Instruct-GGUF 76 | llm_hf_model_file: vigogne-2-7b-instruct.Q4_K_M.gguf 77 | embedding_hf_model_name: dangvantuan/sentence-camembert-base 78 | prompt_style: "default" 79 | # prompt_style: "tag" # also works 80 | # The default system prompt is injected only when the `prompt_style` != default, and there are no system message in the discussion 81 | # default_system_prompt: Vous êtes un assistant IA qui répond à la question posée à la fin en utilisant le contexte suivant. Si vous ne connaissez pas la réponse, dites simplement que vous ne savez pas, n'essayez pas d'inventer une réponse. Veuillez répondre exclusivement en français. 82 | ``` 83 | 84 | You might want to change the prompt as the one above might not directly answer your question. 85 | You can read online about how to write a good prompt, but in a nutshell, make it (extremely) directive. 86 | 87 | You can try and troubleshot your prompt by writing multiline requests in the UI, while 88 | writing your interaction with the model, for example: 89 | 90 | ```text 91 | Tu es un programmeur senior qui programme en python et utilise le framework fastapi. Ecrit moi un serveur qui retourne "hello world". 92 | ``` 93 | 94 | Another example: 95 | ```text 96 | Context: None 97 | Situation: tu es au milieu d'un champ. 98 | Tache: va a la rivière, en bas du champ. 99 | Décrit comment aller a la rivière. 100 | ``` 101 | 102 | ### Optimised Models 103 | GodziLLa2-70B LLM (English, rank 2 on HuggingFace OpenLLM Leaderboard), bge large Embedding Model (rank 1 on HuggingFace MTEB Leaderboard) 104 | `settings-optimised.yaml`: 105 | ```yml 106 | local: 107 | llm_hf_repo_id: TheBloke/GodziLLa2-70B-GGUF 108 | llm_hf_model_file: godzilla2-70b.Q4_K_M.gguf 109 | embedding_hf_model_name: BAAI/bge-large-en 110 | prompt_style: "llama2" 111 | ``` 112 | ### German speaking model 113 | `settings-de.yaml`: 114 | ```yml 115 | local: 116 | llm_hf_repo_id: TheBloke/em_german_leo_mistral-GGUF 117 | llm_hf_model_file: em_german_leo_mistral.Q4_K_M.gguf 118 | embedding_hf_model_name: T-Systems-onsite/german-roberta-sentence-transformer-v2 119 | #llama, default or tag 120 | prompt_style: "default" 121 | ``` 122 | -------------------------------------------------------------------------------- /scripts/ingest_folder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import logging 5 | from pathlib import Path 6 | 7 | from private_gpt.di import global_injector 8 | from private_gpt.server.ingest.ingest_service import IngestService 9 | from private_gpt.server.ingest.ingest_watcher import IngestWatcher 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class LocalIngestWorker: 15 | def __init__(self, ingest_service: IngestService) -> None: 16 | self.ingest_service = ingest_service 17 | 18 | self.total_documents = 0 19 | self.current_document_count = 0 20 | 21 | self._files_under_root_folder: list[Path] = [] 22 | 23 | def _find_all_files_in_folder(self, root_path: Path, ignored: list[str]) -> None: 24 | """Search all files under the root folder recursively. 25 | 26 | Count them at the same time 27 | """ 28 | for file_path in root_path.iterdir(): 29 | if file_path.is_file() and file_path.name not in ignored: 30 | self.total_documents += 1 31 | self._files_under_root_folder.append(file_path) 32 | elif file_path.is_dir() and file_path.name not in ignored: 33 | self._find_all_files_in_folder(file_path, ignored) 34 | 35 | def ingest_folder(self, folder_path: Path, ignored: list[str]) -> None: 36 | # Count total documents before ingestion 37 | self._find_all_files_in_folder(folder_path, ignored) 38 | self._ingest_all(self._files_under_root_folder) 39 | 40 | def _ingest_all(self, files_to_ingest: list[Path]) -> None: 41 | logger.info("Ingesting files=%s", [f.name for f in files_to_ingest]) 42 | self.ingest_service.bulk_ingest([(str(p.name), p) for p in files_to_ingest]) 43 | 44 | def ingest_on_watch(self, changed_path: Path) -> None: 45 | logger.info("Detected change in at path=%s, ingesting", changed_path) 46 | self._do_ingest_one(changed_path) 47 | 48 | def _do_ingest_one(self, changed_path: Path) -> None: 49 | try: 50 | if changed_path.exists(): 51 | logger.info(f"Started ingesting file={changed_path}") 52 | self.ingest_service.ingest_file(changed_path.name, changed_path) 53 | logger.info(f"Completed ingesting file={changed_path}") 54 | except Exception: 55 | logger.exception( 56 | f"Failed to ingest document: {changed_path}, find the exception attached" 57 | ) 58 | 59 | 60 | parser = argparse.ArgumentParser(prog="ingest_folder.py") 61 | parser.add_argument("folder", help="Folder to ingest") 62 | parser.add_argument( 63 | "--watch", 64 | help="Watch for changes", 65 | action=argparse.BooleanOptionalAction, 66 | default=False, 67 | ) 68 | parser.add_argument( 69 | "--ignored", 70 | nargs="*", 71 | help="List of files/directories to ignore", 72 | default=[], 73 | ) 74 | parser.add_argument( 75 | "--log-file", 76 | help="Optional path to a log file. If provided, logs will be written to this file.", 77 | type=str, 78 | default=None, 79 | ) 80 | 81 | args = parser.parse_args() 82 | 83 | # Set up logging to a file if a path is provided 84 | if args.log_file: 85 | file_handler = logging.FileHandler(args.log_file, mode="a") 86 | file_handler.setFormatter( 87 | logging.Formatter( 88 | "[%(asctime)s.%(msecs)03d] [%(levelname)s] %(message)s", 89 | datefmt="%Y-%m-%d %H:%M:%S", 90 | ) 91 | ) 92 | logger.addHandler(file_handler) 93 | 94 | if __name__ == "__main__": 95 | 96 | root_path = Path(args.folder) 97 | if not root_path.exists(): 98 | raise ValueError(f"Path {args.folder} does not exist") 99 | 100 | ingest_service = global_injector.get(IngestService) 101 | worker = LocalIngestWorker(ingest_service) 102 | worker.ingest_folder(root_path, args.ignored) 103 | 104 | if args.ignored: 105 | logger.info(f"Skipping following files and directories: {args.ignored}") 106 | 107 | if args.watch: 108 | logger.info(f"Watching {args.folder} for changes, press Ctrl+C to stop...") 109 | directories_to_watch = [ 110 | dir 111 | for dir in root_path.iterdir() 112 | if dir.is_dir() and dir.name not in args.ignored 113 | ] 114 | watcher = IngestWatcher(args.folder, worker.ingest_on_watch) 115 | watcher.start() 116 | -------------------------------------------------------------------------------- /private_gpt/components/ingest/ingest_helper.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | 4 | from llama_index.core.readers import StringIterableReader 5 | from llama_index.core.readers.base import BaseReader 6 | from llama_index.core.readers.json import JSONReader 7 | from llama_index.core.schema import Document 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | # Inspired by the `llama_index.core.readers.file.base` module 13 | def _try_loading_included_file_formats() -> dict[str, type[BaseReader]]: 14 | try: 15 | from llama_index.readers.file.docs import ( # type: ignore 16 | DocxReader, 17 | HWPReader, 18 | PDFReader, 19 | ) 20 | from llama_index.readers.file.epub import EpubReader # type: ignore 21 | from llama_index.readers.file.image import ImageReader # type: ignore 22 | from llama_index.readers.file.ipynb import IPYNBReader # type: ignore 23 | from llama_index.readers.file.markdown import MarkdownReader # type: ignore 24 | from llama_index.readers.file.mbox import MboxReader # type: ignore 25 | from llama_index.readers.file.slides import PptxReader # type: ignore 26 | from llama_index.readers.file.tabular import PandasCSVReader # type: ignore 27 | from llama_index.readers.file.video_audio import ( # type: ignore 28 | VideoAudioReader, 29 | ) 30 | except ImportError as e: 31 | raise ImportError("`llama-index-readers-file` package not found") from e 32 | 33 | default_file_reader_cls: dict[str, type[BaseReader]] = { 34 | ".hwp": HWPReader, 35 | ".pdf": PDFReader, 36 | ".docx": DocxReader, 37 | ".pptx": PptxReader, 38 | ".ppt": PptxReader, 39 | ".pptm": PptxReader, 40 | ".jpg": ImageReader, 41 | ".png": ImageReader, 42 | ".jpeg": ImageReader, 43 | ".mp3": VideoAudioReader, 44 | ".mp4": VideoAudioReader, 45 | ".csv": PandasCSVReader, 46 | ".epub": EpubReader, 47 | ".md": MarkdownReader, 48 | ".mbox": MboxReader, 49 | ".ipynb": IPYNBReader, 50 | } 51 | return default_file_reader_cls 52 | 53 | 54 | # Patching the default file reader to support other file types 55 | FILE_READER_CLS = _try_loading_included_file_formats() 56 | FILE_READER_CLS.update( 57 | { 58 | ".json": JSONReader, 59 | } 60 | ) 61 | 62 | 63 | class IngestionHelper: 64 | """Helper class to transform a file into a list of documents. 65 | 66 | This class should be used to transform a file into a list of documents. 67 | These methods are thread-safe (and multiprocessing-safe). 68 | """ 69 | 70 | @staticmethod 71 | def transform_file_into_documents( 72 | file_name: str, file_data: Path 73 | ) -> list[Document]: 74 | documents = IngestionHelper._load_file_to_documents(file_name, file_data) 75 | for document in documents: 76 | document.metadata["file_name"] = file_name 77 | IngestionHelper._exclude_metadata(documents) 78 | return documents 79 | 80 | @staticmethod 81 | def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]: 82 | logger.debug("Transforming file_name=%s into documents", file_name) 83 | extension = Path(file_name).suffix 84 | reader_cls = FILE_READER_CLS.get(extension) 85 | if reader_cls is None: 86 | logger.debug( 87 | "No reader found for extension=%s, using default string reader", 88 | extension, 89 | ) 90 | # Read as a plain text 91 | string_reader = StringIterableReader() 92 | return string_reader.load_data([file_data.read_text()]) 93 | 94 | logger.debug("Specific reader found for extension=%s", extension) 95 | return reader_cls().load_data(file_data) 96 | 97 | @staticmethod 98 | def _exclude_metadata(documents: list[Document]) -> None: 99 | logger.debug("Excluding metadata from count=%s documents", len(documents)) 100 | for document in documents: 101 | document.metadata["doc_id"] = document.doc_id 102 | # We don't want the Embeddings search to receive this metadata 103 | document.excluded_embed_metadata_keys = ["doc_id"] 104 | # We don't want the LLM to receive these metadata in the context 105 | document.excluded_llm_metadata_keys = ["file_name", "doc_id", "page_label"] 106 | -------------------------------------------------------------------------------- /private_gpt/server/chat/chat_router.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter, Depends, Request 2 | from llama_index.core.llms import ChatMessage, MessageRole 3 | from pydantic import BaseModel 4 | from starlette.responses import StreamingResponse 5 | 6 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 7 | from private_gpt.open_ai.openai_models import ( 8 | OpenAICompletion, 9 | OpenAIMessage, 10 | to_openai_response, 11 | to_openai_sse_stream, 12 | ) 13 | from private_gpt.server.chat.chat_service import ChatService 14 | from private_gpt.server.utils.auth import authenticated 15 | 16 | chat_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) 17 | 18 | 19 | class ChatBody(BaseModel): 20 | messages: list[OpenAIMessage] 21 | use_context: bool = False 22 | context_filter: ContextFilter | None = None 23 | include_sources: bool = True 24 | stream: bool = False 25 | 26 | model_config = { 27 | "json_schema_extra": { 28 | "examples": [ 29 | { 30 | "messages": [ 31 | { 32 | "role": "system", 33 | "content": "You are a rapper. Always answer with a rap.", 34 | }, 35 | { 36 | "role": "user", 37 | "content": "How do you fry an egg?", 38 | }, 39 | ], 40 | "stream": False, 41 | "use_context": True, 42 | "include_sources": True, 43 | "context_filter": { 44 | "docs_ids": ["c202d5e6-7b69-4869-81cc-dd574ee8ee11"] 45 | }, 46 | } 47 | ] 48 | } 49 | } 50 | 51 | 52 | @chat_router.post( 53 | "/chat/completions", 54 | response_model=None, 55 | responses={200: {"model": OpenAICompletion}}, 56 | tags=["Contextual Completions"], 57 | openapi_extra={ 58 | "x-fern-streaming": { 59 | "stream-condition": "stream", 60 | "response": {"$ref": "#/components/schemas/OpenAICompletion"}, 61 | "response-stream": {"$ref": "#/components/schemas/OpenAICompletion"}, 62 | } 63 | }, 64 | ) 65 | def chat_completion( 66 | request: Request, body: ChatBody 67 | ) -> OpenAICompletion | StreamingResponse: 68 | """Given a list of messages comprising a conversation, return a response. 69 | 70 | Optionally include an initial `role: system` message to influence the way 71 | the LLM answers. 72 | 73 | If `use_context` is set to `true`, the model will use context coming 74 | from the ingested documents to create the response. The documents being used can 75 | be filtered using the `context_filter` and passing the document IDs to be used. 76 | Ingested documents IDs can be found using `/ingest/list` endpoint. If you want 77 | all ingested documents to be used, remove `context_filter` altogether. 78 | 79 | When using `'include_sources': true`, the API will return the source Chunks used 80 | to create the response, which come from the context provided. 81 | 82 | When using `'stream': true`, the API will return data chunks following [OpenAI's 83 | streaming model](https://platform.openai.com/docs/api-reference/chat/streaming): 84 | ``` 85 | {"id":"12345","object":"completion.chunk","created":1694268190, 86 | "model":"private-gpt","choices":[{"index":0,"delta":{"content":"Hello"}, 87 | "finish_reason":null}]} 88 | ``` 89 | """ 90 | service = request.state.injector.get(ChatService) 91 | all_messages = [ 92 | ChatMessage(content=m.content, role=MessageRole(m.role)) for m in body.messages 93 | ] 94 | if body.stream: 95 | completion_gen = service.stream_chat( 96 | messages=all_messages, 97 | use_context=body.use_context, 98 | context_filter=body.context_filter, 99 | ) 100 | return StreamingResponse( 101 | to_openai_sse_stream( 102 | completion_gen.response, 103 | completion_gen.sources if body.include_sources else None, 104 | ), 105 | media_type="text/event-stream", 106 | ) 107 | else: 108 | completion = service.chat( 109 | messages=all_messages, 110 | use_context=body.use_context, 111 | context_filter=body.context_filter, 112 | ) 113 | return to_openai_response( 114 | completion.response, completion.sources if body.include_sources else None 115 | ) 116 | -------------------------------------------------------------------------------- /private_gpt/utils/eta.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import math 4 | import time 5 | from collections import deque 6 | from typing import Any 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def human_time(*args: Any, **kwargs: Any) -> str: 12 | def timedelta_total_seconds(timedelta: datetime.timedelta) -> float: 13 | return ( 14 | timedelta.microseconds 15 | + 0.0 16 | + (timedelta.seconds + timedelta.days * 24 * 3600) * 10**6 17 | ) / 10**6 18 | 19 | secs = float(timedelta_total_seconds(datetime.timedelta(*args, **kwargs))) 20 | # We want (ms) precision below 2 seconds 21 | if secs < 2: 22 | return f"{secs * 1000}ms" 23 | units = [("y", 86400 * 365), ("d", 86400), ("h", 3600), ("m", 60), ("s", 1)] 24 | parts = [] 25 | for unit, mul in units: 26 | if secs / mul >= 1 or mul == 1: 27 | if mul > 1: 28 | n = int(math.floor(secs / mul)) 29 | secs -= n * mul 30 | else: 31 | # >2s we drop the (ms) component. 32 | n = int(secs) 33 | if n: 34 | parts.append(f"{n}{unit}") 35 | return " ".join(parts) 36 | 37 | 38 | def eta(iterator: list[Any]) -> Any: 39 | """Report an ETA after 30s and every 60s thereafter.""" 40 | total = len(iterator) 41 | _eta = ETA(total) 42 | _eta.needReport(30) 43 | for processed, data in enumerate(iterator, start=1): 44 | yield data 45 | _eta.update(processed) 46 | if _eta.needReport(60): 47 | logger.info(f"{processed}/{total} - ETA {_eta.human_time()}") 48 | 49 | 50 | class ETA: 51 | """Predict how long something will take to complete.""" 52 | 53 | def __init__(self, total: int): 54 | self.total: int = total # Total expected records. 55 | self.rate: float = 0.0 # per second 56 | self._timing_data: deque[tuple[float, int]] = deque(maxlen=100) 57 | self.secondsLeft: float = 0.0 58 | self.nexttime: float = 0.0 59 | 60 | def human_time(self) -> str: 61 | if self._calc(): 62 | return f"{human_time(seconds=self.secondsLeft)} @ {int(self.rate * 60)}/min" 63 | return "(computing)" 64 | 65 | def update(self, count: int) -> None: 66 | # count should be in the range 0 to self.total 67 | assert count > 0 68 | assert count <= self.total 69 | self._timing_data.append((time.time(), count)) # (X,Y) for pearson 70 | 71 | def needReport(self, whenSecs: int) -> bool: 72 | now = time.time() 73 | if now > self.nexttime: 74 | self.nexttime = now + whenSecs 75 | return True 76 | return False 77 | 78 | def _calc(self) -> bool: 79 | # A sample before a prediction. Need two points to compute slope! 80 | if len(self._timing_data) < 3: 81 | return False 82 | 83 | # http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient 84 | # Calculate means and standard deviations. 85 | samples = len(self._timing_data) 86 | # column wise sum of the timing tuples to compute their mean. 87 | mean_x, mean_y = ( 88 | sum(i) / samples for i in zip(*self._timing_data, strict=False) 89 | ) 90 | std_x = math.sqrt( 91 | sum(pow(i[0] - mean_x, 2) for i in self._timing_data) / (samples - 1) 92 | ) 93 | std_y = math.sqrt( 94 | sum(pow(i[1] - mean_y, 2) for i in self._timing_data) / (samples - 1) 95 | ) 96 | 97 | # Calculate coefficient. 98 | sum_xy, sum_sq_v_x, sum_sq_v_y = 0.0, 0.0, 0 99 | for x, y in self._timing_data: 100 | x -= mean_x 101 | y -= mean_y 102 | sum_xy += x * y 103 | sum_sq_v_x += pow(x, 2) 104 | sum_sq_v_y += pow(y, 2) 105 | pearson_r = sum_xy / math.sqrt(sum_sq_v_x * sum_sq_v_y) 106 | 107 | # Calculate regression line. 108 | # y = mx + b where m is the slope and b is the y-intercept. 109 | m = self.rate = pearson_r * (std_y / std_x) 110 | y = self.total 111 | b = mean_y - m * mean_x 112 | x = (y - b) / m 113 | 114 | # Calculate fitted line (transformed/shifted regression line horizontally). 115 | fitted_b = self._timing_data[-1][1] - (m * self._timing_data[-1][0]) 116 | fitted_x = (y - fitted_b) / m 117 | _, count = self._timing_data[-1] # adjust last data point progress count 118 | adjusted_x = ((fitted_x - x) * (count / self.total)) + x 119 | eta_epoch = adjusted_x 120 | 121 | self.secondsLeft = max([eta_epoch - time.time(), 0]) 122 | return True 123 | -------------------------------------------------------------------------------- /private_gpt/server/ingest/ingest_router.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile 4 | from pydantic import BaseModel, Field 5 | 6 | from private_gpt.server.ingest.ingest_service import IngestService 7 | from private_gpt.server.ingest.model import IngestedDoc 8 | from private_gpt.server.utils.auth import authenticated 9 | 10 | ingest_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated)]) 11 | 12 | 13 | class IngestTextBody(BaseModel): 14 | file_name: str = Field(examples=["Avatar: The Last Airbender"]) 15 | text: str = Field( 16 | examples=[ 17 | "Avatar is set in an Asian and Arctic-inspired world in which some " 18 | "people can telekinetically manipulate one of the four elements—water, " 19 | "earth, fire or air—through practices known as 'bending', inspired by " 20 | "Chinese martial arts." 21 | ] 22 | ) 23 | 24 | 25 | class IngestResponse(BaseModel): 26 | object: Literal["list"] 27 | model: Literal["private-gpt"] 28 | data: list[IngestedDoc] 29 | 30 | 31 | @ingest_router.post("/ingest", tags=["Ingestion"], deprecated=True) 32 | def ingest(request: Request, file: UploadFile) -> IngestResponse: 33 | """Ingests and processes a file. 34 | 35 | Deprecated. Use ingest/file instead. 36 | """ 37 | return ingest_file(request, file) 38 | 39 | 40 | @ingest_router.post("/ingest/file", tags=["Ingestion"]) 41 | def ingest_file(request: Request, file: UploadFile) -> IngestResponse: 42 | """Ingests and processes a file, storing its chunks to be used as context. 43 | 44 | The context obtained from files is later used in 45 | `/chat/completions`, `/completions`, and `/chunks` APIs. 46 | 47 | Most common document 48 | formats are supported, but you may be prompted to install an extra dependency to 49 | manage a specific file type. 50 | 51 | A file can generate different Documents (for example a PDF generates one Document 52 | per page). All Documents IDs are returned in the response, together with the 53 | extracted Metadata (which is later used to improve context retrieval). Those IDs 54 | can be used to filter the context used to create responses in 55 | `/chat/completions`, `/completions`, and `/chunks` APIs. 56 | """ 57 | service = request.state.injector.get(IngestService) 58 | if file.filename is None: 59 | raise HTTPException(400, "No file name provided") 60 | ingested_documents = service.ingest_bin_data(file.filename, file.file) 61 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents) 62 | 63 | 64 | @ingest_router.post("/ingest/text", tags=["Ingestion"]) 65 | def ingest_text(request: Request, body: IngestTextBody) -> IngestResponse: 66 | """Ingests and processes a text, storing its chunks to be used as context. 67 | 68 | The context obtained from files is later used in 69 | `/chat/completions`, `/completions`, and `/chunks` APIs. 70 | 71 | A Document will be generated with the given text. The Document 72 | ID is returned in the response, together with the 73 | extracted Metadata (which is later used to improve context retrieval). That ID 74 | can be used to filter the context used to create responses in 75 | `/chat/completions`, `/completions`, and `/chunks` APIs. 76 | """ 77 | service = request.state.injector.get(IngestService) 78 | if len(body.file_name) == 0: 79 | raise HTTPException(400, "No file name provided") 80 | ingested_documents = service.ingest_text(body.file_name, body.text) 81 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents) 82 | 83 | 84 | @ingest_router.get("/ingest/list", tags=["Ingestion"]) 85 | def list_ingested(request: Request) -> IngestResponse: 86 | """Lists already ingested Documents including their Document ID and metadata. 87 | 88 | Those IDs can be used to filter the context used to create responses 89 | in `/chat/completions`, `/completions`, and `/chunks` APIs. 90 | """ 91 | service = request.state.injector.get(IngestService) 92 | ingested_documents = service.list_ingested() 93 | return IngestResponse(object="list", model="private-gpt", data=ingested_documents) 94 | 95 | 96 | @ingest_router.delete("/ingest/{doc_id}", tags=["Ingestion"]) 97 | def delete_ingested(request: Request, doc_id: str) -> None: 98 | """Delete the specified ingested Document. 99 | 100 | The `doc_id` can be obtained from the `GET /ingest/list` endpoint. 101 | The document will be effectively deleted from your storage context. 102 | """ 103 | service = request.state.injector.get(IngestService) 104 | service.delete(doc_id) 105 | -------------------------------------------------------------------------------- /private_gpt/ui/images.py: -------------------------------------------------------------------------------- 1 | logo_svg = "" 2 | -------------------------------------------------------------------------------- /tests/test_prompt_helper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from llama_index.core.llms import ChatMessage, MessageRole 3 | 4 | from private_gpt.components.llm.prompt_helper import ( 5 | ChatMLPromptStyle, 6 | DefaultPromptStyle, 7 | Llama2PromptStyle, 8 | MistralPromptStyle, 9 | TagPromptStyle, 10 | get_prompt_style, 11 | ) 12 | 13 | 14 | @pytest.mark.parametrize( 15 | ("prompt_style", "expected_prompt_style"), 16 | [ 17 | ("default", DefaultPromptStyle), 18 | ("llama2", Llama2PromptStyle), 19 | ("tag", TagPromptStyle), 20 | ("mistral", MistralPromptStyle), 21 | ("chatml", ChatMLPromptStyle), 22 | ], 23 | ) 24 | def test_get_prompt_style_success(prompt_style, expected_prompt_style): 25 | assert isinstance(get_prompt_style(prompt_style), expected_prompt_style) 26 | 27 | 28 | def test_get_prompt_style_failure(): 29 | prompt_style = "unknown" 30 | with pytest.raises(ValueError) as exc_info: 31 | get_prompt_style(prompt_style) 32 | assert str(exc_info.value) == f"Unknown prompt_style='{prompt_style}'" 33 | 34 | 35 | def test_tag_prompt_style_format(): 36 | prompt_style = TagPromptStyle() 37 | messages = [ 38 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM), 39 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER), 40 | ] 41 | 42 | expected_prompt = ( 43 | "<|system|>: You are an AI assistant.\n" 44 | "<|user|>: Hello, how are you doing?\n" 45 | "<|assistant|>: " 46 | ) 47 | 48 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 49 | 50 | 51 | def test_tag_prompt_style_format_with_system_prompt(): 52 | prompt_style = TagPromptStyle() 53 | messages = [ 54 | ChatMessage( 55 | content="FOO BAR Custom sys prompt from messages.", role=MessageRole.SYSTEM 56 | ), 57 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER), 58 | ] 59 | 60 | expected_prompt = ( 61 | "<|system|>: FOO BAR Custom sys prompt from messages.\n" 62 | "<|user|>: Hello, how are you doing?\n" 63 | "<|assistant|>: " 64 | ) 65 | 66 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 67 | 68 | 69 | def test_mistral_prompt_style_format(): 70 | prompt_style = MistralPromptStyle() 71 | messages = [ 72 | ChatMessage(content="A", role=MessageRole.SYSTEM), 73 | ChatMessage(content="B", role=MessageRole.USER), 74 | ] 75 | expected_prompt = "[INST] A\nB [/INST]" 76 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 77 | 78 | messages2 = [ 79 | ChatMessage(content="A", role=MessageRole.SYSTEM), 80 | ChatMessage(content="B", role=MessageRole.USER), 81 | ChatMessage(content="C", role=MessageRole.ASSISTANT), 82 | ChatMessage(content="D", role=MessageRole.USER), 83 | ] 84 | expected_prompt2 = "[INST] A\nB [/INST] C[INST] D [/INST]" 85 | assert prompt_style.messages_to_prompt(messages2) == expected_prompt2 86 | 87 | 88 | def test_chatml_prompt_style_format(): 89 | prompt_style = ChatMLPromptStyle() 90 | messages = [ 91 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM), 92 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER), 93 | ] 94 | 95 | expected_prompt = ( 96 | "<|im_start|>system\n" 97 | "You are an AI assistant.<|im_end|>\n" 98 | "<|im_start|>user\n" 99 | "Hello, how are you doing?<|im_end|>\n" 100 | "<|im_start|>assistant\n" 101 | ) 102 | 103 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 104 | 105 | 106 | def test_llama2_prompt_style_format(): 107 | prompt_style = Llama2PromptStyle() 108 | messages = [ 109 | ChatMessage(content="You are an AI assistant.", role=MessageRole.SYSTEM), 110 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER), 111 | ] 112 | 113 | expected_prompt = ( 114 | " [INST] <>\n" 115 | " You are an AI assistant. \n" 116 | "<>\n" 117 | "\n" 118 | " Hello, how are you doing? [/INST]" 119 | ) 120 | 121 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 122 | 123 | 124 | def test_llama2_prompt_style_with_system_prompt(): 125 | prompt_style = Llama2PromptStyle() 126 | messages = [ 127 | ChatMessage( 128 | content="FOO BAR Custom sys prompt from messages.", role=MessageRole.SYSTEM 129 | ), 130 | ChatMessage(content="Hello, how are you doing?", role=MessageRole.USER), 131 | ] 132 | 133 | expected_prompt = ( 134 | " [INST] <>\n" 135 | " FOO BAR Custom sys prompt from messages. \n" 136 | "<>\n" 137 | "\n" 138 | " Hello, how are you doing? [/INST]" 139 | ) 140 | 141 | assert prompt_style.messages_to_prompt(messages) == expected_prompt 142 | -------------------------------------------------------------------------------- /private_gpt/server/chunks/chunks_service.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Literal 2 | 3 | from injector import inject, singleton 4 | from llama_index.core.indices import VectorStoreIndex 5 | from llama_index.core.schema import NodeWithScore 6 | from llama_index.core.storage import StorageContext 7 | from pydantic import BaseModel, Field 8 | 9 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent 10 | from private_gpt.components.llm.llm_component import LLMComponent 11 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent 12 | from private_gpt.components.vector_store.vector_store_component import ( 13 | VectorStoreComponent, 14 | ) 15 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 16 | from private_gpt.server.ingest.model import IngestedDoc 17 | 18 | if TYPE_CHECKING: 19 | from llama_index.core.schema import RelatedNodeInfo 20 | 21 | 22 | class Chunk(BaseModel): 23 | object: Literal["context.chunk"] 24 | score: float = Field(examples=[0.023]) 25 | document: IngestedDoc 26 | text: str = Field(examples=["Outbound sales increased 20%, driven by new leads."]) 27 | previous_texts: list[str] | None = Field( 28 | default=None, 29 | examples=[["SALES REPORT 2023", "Inbound didn't show major changes."]], 30 | ) 31 | next_texts: list[str] | None = Field( 32 | default=None, 33 | examples=[ 34 | [ 35 | "New leads came from Google Ads campaign.", 36 | "The campaign was run by the Marketing Department", 37 | ] 38 | ], 39 | ) 40 | 41 | @classmethod 42 | def from_node(cls: type["Chunk"], node: NodeWithScore) -> "Chunk": 43 | doc_id = node.node.ref_doc_id if node.node.ref_doc_id is not None else "-" 44 | return cls( 45 | object="context.chunk", 46 | score=node.score or 0.0, 47 | document=IngestedDoc( 48 | object="ingest.document", 49 | doc_id=doc_id, 50 | doc_metadata=node.metadata, 51 | ), 52 | text=node.get_content(), 53 | ) 54 | 55 | 56 | @singleton 57 | class ChunksService: 58 | @inject 59 | def __init__( 60 | self, 61 | llm_component: LLMComponent, 62 | vector_store_component: VectorStoreComponent, 63 | embedding_component: EmbeddingComponent, 64 | node_store_component: NodeStoreComponent, 65 | ) -> None: 66 | self.vector_store_component = vector_store_component 67 | self.llm_component = llm_component 68 | self.embedding_component = embedding_component 69 | self.storage_context = StorageContext.from_defaults( 70 | vector_store=vector_store_component.vector_store, 71 | docstore=node_store_component.doc_store, 72 | index_store=node_store_component.index_store, 73 | ) 74 | 75 | def _get_sibling_nodes_text( 76 | self, node_with_score: NodeWithScore, related_number: int, forward: bool = True 77 | ) -> list[str]: 78 | explored_nodes_texts = [] 79 | current_node = node_with_score.node 80 | for _ in range(related_number): 81 | explored_node_info: RelatedNodeInfo | None = ( 82 | current_node.next_node if forward else current_node.prev_node 83 | ) 84 | if explored_node_info is None: 85 | break 86 | 87 | explored_node = self.storage_context.docstore.get_node( 88 | explored_node_info.node_id 89 | ) 90 | 91 | explored_nodes_texts.append(explored_node.get_content()) 92 | current_node = explored_node 93 | 94 | return explored_nodes_texts 95 | 96 | def retrieve_relevant( 97 | self, 98 | text: str, 99 | context_filter: ContextFilter | None = None, 100 | limit: int = 10, 101 | prev_next_chunks: int = 0, 102 | ) -> list[Chunk]: 103 | index = VectorStoreIndex.from_vector_store( 104 | self.vector_store_component.vector_store, 105 | storage_context=self.storage_context, 106 | llm=self.llm_component.llm, 107 | embed_model=self.embedding_component.embedding_model, 108 | show_progress=True, 109 | ) 110 | vector_index_retriever = self.vector_store_component.get_retriever( 111 | index=index, context_filter=context_filter, similarity_top_k=limit 112 | ) 113 | nodes = vector_index_retriever.retrieve(text) 114 | nodes.sort(key=lambda n: n.score or 0.0, reverse=True) 115 | 116 | retrieved_nodes = [] 117 | for node in nodes: 118 | chunk = Chunk.from_node(node) 119 | chunk.previous_texts = self._get_sibling_nodes_text( 120 | node, prev_next_chunks, False 121 | ) 122 | chunk.next_texts = self._get_sibling_nodes_text(node, prev_next_chunks) 123 | retrieved_nodes.append(chunk) 124 | 125 | return retrieved_nodes 126 | -------------------------------------------------------------------------------- /settings.yaml: -------------------------------------------------------------------------------- 1 | # The default configuration file. 2 | # More information about configuration can be found in the documentation: https://docs.privategpt.dev/ 3 | # Syntax in `private_pgt/settings/settings.py` 4 | server: 5 | env_name: ${APP_ENV:prod} 6 | port: ${PORT:8001} 7 | cors: 8 | enabled: true 9 | allow_origins: ["*"] 10 | allow_methods: ["*"] 11 | allow_headers: ["*"] 12 | auth: 13 | enabled: false 14 | # python -c 'import base64; print("Basic " + base64.b64encode("secret:key".encode()).decode())' 15 | # 'secret' is the username and 'key' is the password for basic auth by default 16 | # If the auth is enabled, this value must be set in the "Authorization" header of the request. 17 | secret: "Basic c2VjcmV0OmtleQ==" 18 | 19 | data: 20 | local_data_folder: local_data/private_gpt 21 | 22 | ui: 23 | enabled: true 24 | path: / 25 | default_chat_system_prompt: > 26 | You are a helpful, respectful and honest assistant. 27 | Always answer as helpfully as possible and follow ALL given instructions. 28 | Do not speculate or make up information. 29 | Do not reference any given instructions or context. 30 | default_query_system_prompt: > 31 | You can only answer questions about the provided context. 32 | If you know the answer but it is not based in the provided context, don't provide 33 | the answer, just state the answer is not in the context provided. 34 | delete_file_button_enabled: true 35 | delete_all_files_button_enabled: true 36 | 37 | llm: 38 | mode: llamacpp 39 | prompt_style: "mistral" 40 | # Should be matching the selected model 41 | max_new_tokens: 512 42 | context_window: 3900 43 | tokenizer: mistralai/Mistral-7B-Instruct-v0.2 44 | temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) 45 | 46 | rag: 47 | similarity_top_k: 2 48 | #This value controls how many "top" documents the RAG returns to use in the context. 49 | #similarity_value: 0.45 50 | #This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score. 51 | rerank: 52 | enabled: false 53 | model: cross-encoder/ms-marco-MiniLM-L-2-v2 54 | top_n: 1 55 | 56 | clickhouse: 57 | host: localhost 58 | port: 8443 59 | username: admin 60 | password: clickhouse 61 | database: embeddings 62 | 63 | llamacpp: 64 | llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF 65 | llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf 66 | tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting 67 | top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) 68 | top_p: 1.0 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) 69 | repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) 70 | 71 | embedding: 72 | # Should be matching the value above in most cases 73 | mode: huggingface 74 | ingest_mode: simple 75 | embed_dim: 384 # 384 is for BAAI/bge-small-en-v1.5 76 | 77 | huggingface: 78 | embedding_hf_model_name: BAAI/bge-small-en-v1.5 79 | access_token: ${HUGGINGFACE_TOKEN:} 80 | 81 | vectorstore: 82 | database: qdrant 83 | 84 | nodestore: 85 | database: simple 86 | 87 | qdrant: 88 | path: local_data/private_gpt/qdrant 89 | 90 | postgres: 91 | host: localhost 92 | port: 5432 93 | database: postgres 94 | user: postgres 95 | password: postgres 96 | schema_name: private_gpt 97 | 98 | sagemaker: 99 | llm_endpoint_name: huggingface-pytorch-tgi-inference-2023-09-25-19-53-32-140 100 | embedding_endpoint_name: huggingface-pytorch-inference-2023-11-03-07-41-36-479 101 | 102 | openai: 103 | api_key: ${OPENAI_API_KEY:} 104 | model: gpt-3.5-turbo 105 | embedding_api_key: ${OPENAI_API_KEY:} 106 | 107 | ollama: 108 | llm_model: llama2 109 | embedding_model: nomic-embed-text 110 | api_base: http://localhost:11434 111 | embedding_api_base: http://localhost:11434 # change if your embedding model runs on another ollama 112 | keep_alive: 5m 113 | request_timeout: 120.0 114 | 115 | azopenai: 116 | api_key: ${AZ_OPENAI_API_KEY:} 117 | azure_endpoint: ${AZ_OPENAI_ENDPOINT:} 118 | embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:} 119 | llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:} 120 | api_version: "2023-05-15" 121 | embedding_model: text-embedding-ada-002 122 | llm_model: gpt-35-turbo 123 | 124 | gemini: 125 | api_key: ${GOOGLE_API_KEY:} 126 | model: models/gemini-pro 127 | embedding_model: models/embedding-001 128 | -------------------------------------------------------------------------------- /fern/docs/pages/manual/ingestion.mdx: -------------------------------------------------------------------------------- 1 | # Ingesting & Managing Documents 2 | 3 | The ingestion of documents can be done in different ways: 4 | 5 | * Using the `/ingest` API 6 | * Using the Gradio UI 7 | * Using the Bulk Local Ingestion functionality (check next section) 8 | 9 | ## Bulk Local Ingestion 10 | 11 | When you are running PrivateGPT in a fully local setup, you can ingest a complete folder for convenience (containing 12 | pdf, text files, etc.) 13 | and optionally watch changes on it with the command: 14 | 15 | ```bash 16 | make ingest /path/to/folder -- --watch 17 | ``` 18 | 19 | To log the processed and failed files to an additional file, use: 20 | 21 | ```bash 22 | make ingest /path/to/folder -- --watch --log-file /path/to/log/file.log 23 | ``` 24 | 25 | **Note for Windows Users:** Depending on your Windows version and whether you are using PowerShell to execute 26 | PrivateGPT API calls, you may need to include the parameter name before passing the folder path for consumption: 27 | 28 | ```bash 29 | make ingest arg=/path/to/folder -- --watch --log-file /path/to/log/file.log 30 | ``` 31 | 32 | After ingestion is complete, you should be able to chat with your documents 33 | by navigating to http://localhost:8001 and using the option `Query documents`, 34 | or using the completions / chat API. 35 | 36 | ## Ingestion troubleshooting 37 | 38 | ### Running out of memory 39 | 40 | To do not run out of memory, you should ingest your documents without the LLM loaded in your (video) memory. 41 | To do so, you should change your configuration to set `llm.mode: mock`. 42 | 43 | You can also use the existing `PGPT_PROFILES=mock` that will set the following configuration for you: 44 | 45 | ```yaml 46 | llm: 47 | mode: mock 48 | embedding: 49 | mode: local 50 | ``` 51 | 52 | This configuration allows you to use hardware acceleration for creating embeddings while avoiding loading the full LLM into (video) memory. 53 | 54 | Once your documents are ingested, you can set the `llm.mode` value back to `local` (or your previous custom value). 55 | 56 | ### Ingestion speed 57 | 58 | The ingestion speed depends on the number of documents you are ingesting, and the size of each document. 59 | To speed up the ingestion, you can change the ingestion mode in configuration. 60 | 61 | The following ingestion mode exist: 62 | * `simple`: historic behavior, ingest one document at a time, sequentially 63 | * `batch`: read, parse, and embed multiple documents using batches (batch read, and then batch parse, and then batch embed) 64 | * `parallel`: read, parse, and embed multiple documents in parallel. This is the fastest ingestion mode for local setup. 65 | * `pipeline`: Alternative to parallel. 66 | To change the ingestion mode, you can use the `embedding.ingest_mode` configuration value. The default value is `simple`. 67 | 68 | To configure the number of workers used for parallel or batched ingestion, you can use 69 | the `embedding.count_workers` configuration value. If you set this value too high, you might run out of 70 | memory, so be mindful when setting this value. The default value is `2`. 71 | For `batch` mode, you can easily set this value to your number of threads available on your CPU without 72 | running out of memory. For `parallel` mode, you should be more careful, and set this value to a lower value. 73 | 74 | The configuration below should be enough for users who want to stress more their hardware: 75 | ```yaml 76 | embedding: 77 | ingest_mode: parallel 78 | count_workers: 4 79 | ``` 80 | 81 | If your hardware is powerful enough, and that you are loading heavy documents, you can increase the number of workers. 82 | It is recommended to do your own tests to find the optimal value for your hardware. 83 | 84 | If you have a `bash` shell, you can use this set of command to do your own benchmark: 85 | 86 | ```bash 87 | # Wipe your local data, to put yourself in a clean state 88 | # This will delete all your ingested documents 89 | make wipe 90 | 91 | time PGPT_PROFILES=mock python ./scripts/ingest_folder.py ~/my-dir/to-ingest/ 92 | ``` 93 | 94 | ## Supported file formats 95 | 96 | privateGPT by default supports all the file formats that contains clear text (for example, `.txt` files, `.html`, etc.). 97 | However, these text based file formats as only considered as text files, and are not pre-processed in any other way. 98 | 99 | It also supports the following file formats: 100 | * `.hwp` 101 | * `.pdf` 102 | * `.docx` 103 | * `.pptx` 104 | * `.ppt` 105 | * `.pptm` 106 | * `.jpg` 107 | * `.png` 108 | * `.jpeg` 109 | * `.mp3` 110 | * `.mp4` 111 | * `.csv` 112 | * `.epub` 113 | * `.md` 114 | * `.mbox` 115 | * `.ipynb` 116 | * `.json` 117 | 118 | **Please note the following nuance**: while `privateGPT` supports these file formats, it **might** require additional 119 | dependencies to be installed in your python's virtual environment. 120 | For example, if you try to ingest `.epub` files, `privateGPT` might fail to do it, and will instead display an 121 | explanatory error asking you to download the necessary dependencies to install this file format. 122 | 123 | 124 | **Other file formats might work**, but they will be considered as plain text 125 | files (in other words, they will be ingested as `.txt` files). -------------------------------------------------------------------------------- /private_gpt/components/embedding/embedding_component.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from injector import inject, singleton 4 | from llama_index.core.embeddings import BaseEmbedding, MockEmbedding 5 | 6 | from private_gpt.paths import models_cache_path 7 | from private_gpt.settings.settings import Settings 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @singleton 13 | class EmbeddingComponent: 14 | embedding_model: BaseEmbedding 15 | 16 | @inject 17 | def __init__(self, settings: Settings) -> None: 18 | embedding_mode = settings.embedding.mode 19 | logger.info("Initializing the embedding model in mode=%s", embedding_mode) 20 | match embedding_mode: 21 | case "huggingface": 22 | try: 23 | from llama_index.embeddings.huggingface import ( # type: ignore 24 | HuggingFaceEmbedding, 25 | ) 26 | except ImportError as e: 27 | raise ImportError( 28 | "Local dependencies not found, install with `poetry install --extras embeddings-huggingface`" 29 | ) from e 30 | 31 | self.embedding_model = HuggingFaceEmbedding( 32 | model_name=settings.huggingface.embedding_hf_model_name, 33 | cache_folder=str(models_cache_path), 34 | ) 35 | case "sagemaker": 36 | try: 37 | from private_gpt.components.embedding.custom.sagemaker import ( 38 | SagemakerEmbedding, 39 | ) 40 | except ImportError as e: 41 | raise ImportError( 42 | "Sagemaker dependencies not found, install with `poetry install --extras embeddings-sagemaker`" 43 | ) from e 44 | 45 | self.embedding_model = SagemakerEmbedding( 46 | endpoint_name=settings.sagemaker.embedding_endpoint_name, 47 | ) 48 | case "openai": 49 | try: 50 | from llama_index.embeddings.openai import ( # type: ignore 51 | OpenAIEmbedding, 52 | ) 53 | except ImportError as e: 54 | raise ImportError( 55 | "OpenAI dependencies not found, install with `poetry install --extras embeddings-openai`" 56 | ) from e 57 | 58 | api_base = ( 59 | settings.openai.embedding_api_base or settings.openai.api_base 60 | ) 61 | api_key = settings.openai.embedding_api_key or settings.openai.api_key 62 | model = settings.openai.embedding_model 63 | 64 | self.embedding_model = OpenAIEmbedding( 65 | api_base=api_base, 66 | api_key=api_key, 67 | model=model, 68 | ) 69 | case "ollama": 70 | try: 71 | from llama_index.embeddings.ollama import ( # type: ignore 72 | OllamaEmbedding, 73 | ) 74 | except ImportError as e: 75 | raise ImportError( 76 | "Local dependencies not found, install with `poetry install --extras embeddings-ollama`" 77 | ) from e 78 | 79 | ollama_settings = settings.ollama 80 | self.embedding_model = OllamaEmbedding( 81 | model_name=ollama_settings.embedding_model, 82 | base_url=ollama_settings.embedding_api_base, 83 | ) 84 | case "azopenai": 85 | try: 86 | from llama_index.embeddings.azure_openai import ( # type: ignore 87 | AzureOpenAIEmbedding, 88 | ) 89 | except ImportError as e: 90 | raise ImportError( 91 | "Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`" 92 | ) from e 93 | 94 | azopenai_settings = settings.azopenai 95 | self.embedding_model = AzureOpenAIEmbedding( 96 | model=azopenai_settings.embedding_model, 97 | deployment_name=azopenai_settings.embedding_deployment_name, 98 | api_key=azopenai_settings.api_key, 99 | azure_endpoint=azopenai_settings.azure_endpoint, 100 | api_version=azopenai_settings.api_version, 101 | ) 102 | case "gemini": 103 | try: 104 | from llama_index.embeddings.gemini import ( # type: ignore 105 | GeminiEmbedding, 106 | ) 107 | except ImportError as e: 108 | raise ImportError( 109 | "Gemini dependencies not found, install with `poetry install --extras embeddings-gemini`" 110 | ) from e 111 | 112 | self.embedding_model = GeminiEmbedding( 113 | api_key=settings.gemini.api_key, 114 | model_name=settings.gemini.embedding_model, 115 | ) 116 | case "mock": 117 | # Not a random number, is the dimensionality used by 118 | # the default embedding model 119 | self.embedding_model = MockEmbedding(384) 120 | -------------------------------------------------------------------------------- /private_gpt/server/ingest/ingest_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import tempfile 3 | from pathlib import Path 4 | from typing import TYPE_CHECKING, AnyStr, BinaryIO 5 | 6 | from injector import inject, singleton 7 | from llama_index.core.node_parser import SentenceWindowNodeParser 8 | from llama_index.core.storage import StorageContext 9 | 10 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent 11 | from private_gpt.components.ingest.ingest_component import get_ingestion_component 12 | from private_gpt.components.llm.llm_component import LLMComponent 13 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent 14 | from private_gpt.components.vector_store.vector_store_component import ( 15 | VectorStoreComponent, 16 | ) 17 | from private_gpt.server.ingest.model import IngestedDoc 18 | from private_gpt.settings.settings import settings 19 | 20 | if TYPE_CHECKING: 21 | from llama_index.core.storage.docstore.types import RefDocInfo 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | @singleton 27 | class IngestService: 28 | @inject 29 | def __init__( 30 | self, 31 | llm_component: LLMComponent, 32 | vector_store_component: VectorStoreComponent, 33 | embedding_component: EmbeddingComponent, 34 | node_store_component: NodeStoreComponent, 35 | ) -> None: 36 | self.llm_service = llm_component 37 | self.storage_context = StorageContext.from_defaults( 38 | vector_store=vector_store_component.vector_store, 39 | docstore=node_store_component.doc_store, 40 | index_store=node_store_component.index_store, 41 | ) 42 | node_parser = SentenceWindowNodeParser.from_defaults() 43 | 44 | self.ingest_component = get_ingestion_component( 45 | self.storage_context, 46 | embed_model=embedding_component.embedding_model, 47 | transformations=[node_parser, embedding_component.embedding_model], 48 | settings=settings(), 49 | ) 50 | 51 | def _ingest_data(self, file_name: str, file_data: AnyStr) -> list[IngestedDoc]: 52 | logger.debug("Got file data of size=%s to ingest", len(file_data)) 53 | # llama-index mainly supports reading from files, so 54 | # we have to create a tmp file to read for it to work 55 | # delete=False to avoid a Windows 11 permission error. 56 | with tempfile.NamedTemporaryFile(delete=False) as tmp: 57 | try: 58 | path_to_tmp = Path(tmp.name) 59 | if isinstance(file_data, bytes): 60 | path_to_tmp.write_bytes(file_data) 61 | else: 62 | path_to_tmp.write_text(str(file_data)) 63 | return self.ingest_file(file_name, path_to_tmp) 64 | finally: 65 | tmp.close() 66 | path_to_tmp.unlink() 67 | 68 | def ingest_file(self, file_name: str, file_data: Path) -> list[IngestedDoc]: 69 | logger.info("Ingesting file_name=%s", file_name) 70 | documents = self.ingest_component.ingest(file_name, file_data) 71 | logger.info("Finished ingestion file_name=%s", file_name) 72 | return [IngestedDoc.from_document(document) for document in documents] 73 | 74 | def ingest_text(self, file_name: str, text: str) -> list[IngestedDoc]: 75 | logger.debug("Ingesting text data with file_name=%s", file_name) 76 | return self._ingest_data(file_name, text) 77 | 78 | def ingest_bin_data( 79 | self, file_name: str, raw_file_data: BinaryIO 80 | ) -> list[IngestedDoc]: 81 | logger.debug("Ingesting binary data with file_name=%s", file_name) 82 | file_data = raw_file_data.read() 83 | return self._ingest_data(file_name, file_data) 84 | 85 | def bulk_ingest(self, files: list[tuple[str, Path]]) -> list[IngestedDoc]: 86 | logger.info("Ingesting file_names=%s", [f[0] for f in files]) 87 | documents = self.ingest_component.bulk_ingest(files) 88 | logger.info("Finished ingestion file_name=%s", [f[0] for f in files]) 89 | return [IngestedDoc.from_document(document) for document in documents] 90 | 91 | def list_ingested(self) -> list[IngestedDoc]: 92 | ingested_docs: list[IngestedDoc] = [] 93 | try: 94 | docstore = self.storage_context.docstore 95 | ref_docs: dict[str, RefDocInfo] | None = docstore.get_all_ref_doc_info() 96 | 97 | if not ref_docs: 98 | return ingested_docs 99 | 100 | for doc_id, ref_doc_info in ref_docs.items(): 101 | doc_metadata = None 102 | if ref_doc_info is not None and ref_doc_info.metadata is not None: 103 | doc_metadata = IngestedDoc.curate_metadata(ref_doc_info.metadata) 104 | ingested_docs.append( 105 | IngestedDoc( 106 | object="ingest.document", 107 | doc_id=doc_id, 108 | doc_metadata=doc_metadata, 109 | ) 110 | ) 111 | except ValueError: 112 | logger.warning("Got an exception when getting list of docs", exc_info=True) 113 | pass 114 | logger.debug("Found count=%s ingested documents", len(ingested_docs)) 115 | return ingested_docs 116 | 117 | def delete(self, doc_id: str) -> None: 118 | """Delete an ingested document. 119 | 120 | :raises ValueError: if the document does not exist 121 | """ 122 | logger.info( 123 | "Deleting the ingested document=%s in the doc and index store", doc_id 124 | ) 125 | self.ingest_component.delete(doc_id) 126 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "private-gpt" 3 | version = "0.5.0" 4 | description = "Private GPT" 5 | authors = ["Zylon "] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.11,<3.12" 9 | # PrivateGPT 10 | fastapi = { extras = ["all"], version = "^0.111.0" } 11 | python-multipart = "^0.0.9" 12 | injector = "^0.21.0" 13 | pyyaml = "^6.0.1" 14 | watchdog = "^4.0.1" 15 | transformers = "^4.42.3" 16 | docx2txt = "^0.8" 17 | cryptography = "^3.1" 18 | # LlamaIndex core libs 19 | llama-index-core = "^0.10.52" 20 | llama-index-readers-file = "^0.1.27" 21 | # Optional LlamaIndex integration libs 22 | llama-index-llms-llama-cpp = {version = "^0.1.4", optional = true} 23 | llama-index-llms-openai = {version = "^0.1.25", optional = true} 24 | llama-index-llms-openai-like = {version ="^0.1.3", optional = true} 25 | llama-index-llms-ollama = {version ="^0.1.5", optional = true} 26 | llama-index-llms-azure-openai = {version ="^0.1.8", optional = true} 27 | llama-index-llms-gemini = {version ="^0.1.11", optional = true} 28 | llama-index-embeddings-ollama = {version ="^0.1.2", optional = true} 29 | llama-index-embeddings-huggingface = {version ="^0.2.2", optional = true} 30 | llama-index-embeddings-openai = {version ="^0.1.10", optional = true} 31 | llama-index-embeddings-azure-openai = {version ="^0.1.10", optional = true} 32 | llama-index-embeddings-gemini = {version ="^0.1.8", optional = true} 33 | llama-index-vector-stores-qdrant = {version ="^0.2.10", optional = true} 34 | llama-index-vector-stores-chroma = {version ="^0.1.10", optional = true} 35 | llama-index-vector-stores-postgres = {version ="^0.1.11", optional = true} 36 | llama-index-vector-stores-clickhouse = {version ="^0.1.3", optional = true} 37 | llama-index-storage-docstore-postgres = {version ="^0.1.3", optional = true} 38 | llama-index-storage-index-store-postgres = {version ="^0.1.4", optional = true} 39 | # Postgres 40 | psycopg2-binary = {version ="^2.9.9", optional = true} 41 | asyncpg = {version="^0.29.0", optional = true} 42 | 43 | # ClickHouse 44 | clickhouse-connect = {version = "^0.7.15", optional = true} 45 | 46 | # Optional Sagemaker dependency 47 | boto3 = {version ="^1.34.139", optional = true} 48 | 49 | # Optional Qdrant client 50 | qdrant-client = {version ="^1.9.0", optional = true} 51 | 52 | # Optional Reranker dependencies 53 | torch = {version ="^2.3.1", optional = true} 54 | sentence-transformers = {version ="^3.0.1", optional = true} 55 | 56 | # Optional UI 57 | gradio = {version ="^4.37.2", optional = true} 58 | 59 | # Optional Google Gemini dependency 60 | google-generativeai = {version ="^0.5.4", optional = true} 61 | 62 | [tool.poetry.extras] 63 | ui = ["gradio"] 64 | llms-llama-cpp = ["llama-index-llms-llama-cpp"] 65 | llms-openai = ["llama-index-llms-openai"] 66 | llms-openai-like = ["llama-index-llms-openai-like"] 67 | llms-ollama = ["llama-index-llms-ollama"] 68 | llms-sagemaker = ["boto3"] 69 | llms-azopenai = ["llama-index-llms-azure-openai"] 70 | llms-gemini = ["llama-index-llms-gemini", "google-generativeai"] 71 | embeddings-ollama = ["llama-index-embeddings-ollama"] 72 | embeddings-huggingface = ["llama-index-embeddings-huggingface"] 73 | embeddings-openai = ["llama-index-embeddings-openai"] 74 | embeddings-sagemaker = ["boto3"] 75 | embeddings-azopenai = ["llama-index-embeddings-azure-openai"] 76 | embeddings-gemini = ["llama-index-embeddings-gemini"] 77 | vector-stores-qdrant = ["llama-index-vector-stores-qdrant"] 78 | vector-stores-clickhouse = ["llama-index-vector-stores-clickhouse", "clickhouse_connect"] 79 | vector-stores-chroma = ["llama-index-vector-stores-chroma"] 80 | vector-stores-postgres = ["llama-index-vector-stores-postgres"] 81 | storage-nodestore-postgres = ["llama-index-storage-docstore-postgres","llama-index-storage-index-store-postgres","psycopg2-binary","asyncpg"] 82 | rerank-sentence-transformers = ["torch", "sentence-transformers"] 83 | 84 | [tool.poetry.group.dev.dependencies] 85 | black = "^22" 86 | mypy = "^1.2" 87 | pre-commit = "^2" 88 | pytest = "^7" 89 | pytest-cov = "^3" 90 | ruff = "^0" 91 | pytest-asyncio = "^0.21.1" 92 | types-pyyaml = "^6.0.12.12" 93 | 94 | [build-system] 95 | requires = ["poetry-core>=1.0.0"] 96 | build-backend = "poetry.core.masonry.api" 97 | 98 | # Packages configs 99 | 100 | ## coverage 101 | 102 | [tool.coverage.run] 103 | branch = true 104 | 105 | [tool.coverage.report] 106 | skip_empty = true 107 | precision = 2 108 | 109 | ## black 110 | 111 | [tool.black] 112 | target-version = ['py311'] 113 | 114 | ## ruff 115 | # Recommended ruff config for now, to be updated as we go along. 116 | [tool.ruff] 117 | target-version = 'py311' 118 | 119 | # See all rules at https://beta.ruff.rs/docs/rules/ 120 | select = [ 121 | "E", # pycodestyle 122 | "W", # pycodestyle 123 | "F", # Pyflakes 124 | "B", # flake8-bugbear 125 | "C4", # flake8-comprehensions 126 | "D", # pydocstyle 127 | "I", # isort 128 | "SIM", # flake8-simplify 129 | "TCH", # flake8-type-checking 130 | "TID", # flake8-tidy-imports 131 | "Q", # flake8-quotes 132 | "UP", # pyupgrade 133 | "PT", # flake8-pytest-style 134 | "RUF", # Ruff-specific rules 135 | ] 136 | 137 | ignore = [ 138 | "E501", # "Line too long" 139 | # -> line length already regulated by black 140 | "PT011", # "pytest.raises() should specify expected exception" 141 | # -> would imply to update tests every time you update exception message 142 | "SIM102", # "Use a single `if` statement instead of nested `if` statements" 143 | # -> too restrictive, 144 | "D100", 145 | "D101", 146 | "D102", 147 | "D103", 148 | "D104", 149 | "D105", 150 | "D106", 151 | "D107" 152 | # -> "Missing docstring in public function too restrictive" 153 | ] 154 | 155 | [tool.ruff.pydocstyle] 156 | # Automatically disable rules that are incompatible with Google docstring convention 157 | convention = "google" 158 | 159 | [tool.ruff.pycodestyle] 160 | max-doc-length = 88 161 | 162 | [tool.ruff.flake8-tidy-imports] 163 | ban-relative-imports = "all" 164 | 165 | [tool.ruff.flake8-type-checking] 166 | strict = true 167 | runtime-evaluated-base-classes = ["pydantic.BaseModel"] 168 | # Pydantic needs to be able to evaluate types at runtime 169 | # see https://pypi.org/project/flake8-type-checking/ for flake8-type-checking documentation 170 | # see https://beta.ruff.rs/docs/settings/#flake8-type-checking-runtime-evaluated-base-classes for ruff documentation 171 | 172 | [tool.ruff.per-file-ignores] 173 | # Allow missing docstrings for tests 174 | "tests/**/*.py" = ["D1"] 175 | 176 | ## mypy 177 | 178 | [tool.mypy] 179 | python_version = "3.11" 180 | strict = true 181 | check_untyped_defs = false 182 | explicit_package_bases = true 183 | warn_unused_ignores = false 184 | exclude = ["tests"] 185 | 186 | [tool.mypy-llama-index] 187 | ignore_missing_imports = true 188 | 189 | [tool.pytest.ini_options] 190 | asyncio_mode = "auto" 191 | testpaths = ["tests"] 192 | addopts = [ 193 | "--import-mode=importlib", 194 | ] 195 | -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | from typing import Any, ClassVar 5 | 6 | from private_gpt.paths import local_data_path 7 | from private_gpt.settings.settings import settings 8 | 9 | 10 | def wipe_file(file: str) -> None: 11 | if os.path.isfile(file): 12 | os.remove(file) 13 | print(f" - Deleted {file}") 14 | 15 | 16 | def wipe_tree(path: str) -> None: 17 | if not os.path.exists(path): 18 | print(f"Warning: Path not found {path}") 19 | return 20 | print(f"Wiping {path}...") 21 | all_files = os.listdir(path) 22 | 23 | files_to_remove = [file for file in all_files if file != ".gitignore"] 24 | for file_name in files_to_remove: 25 | file_path = os.path.join(path, file_name) 26 | try: 27 | if os.path.isfile(file_path): 28 | os.remove(file_path) 29 | elif os.path.isdir(file_path): 30 | shutil.rmtree(file_path) 31 | print(f" - Deleted {file_path}") 32 | except PermissionError: 33 | print( 34 | f"PermissionError: Unable to remove {file_path}. It is in use by another process." 35 | ) 36 | continue 37 | 38 | 39 | class Postgres: 40 | tables: ClassVar[dict[str, list[str]]] = { 41 | "nodestore": ["data_docstore", "data_indexstore"], 42 | "vectorstore": ["data_embeddings"], 43 | } 44 | 45 | def __init__(self) -> None: 46 | try: 47 | import psycopg2 48 | except ModuleNotFoundError: 49 | raise ModuleNotFoundError("Postgres dependencies not found") from None 50 | 51 | connection = settings().postgres.model_dump(exclude_none=True) 52 | self.schema = connection.pop("schema_name") 53 | self.conn = psycopg2.connect(**connection) 54 | 55 | def wipe(self, storetype: str) -> None: 56 | cur = self.conn.cursor() 57 | try: 58 | for table in self.tables[storetype]: 59 | sql = f"DROP TABLE IF EXISTS {self.schema}.{table}" 60 | cur.execute(sql) 61 | print(f"Table {self.schema}.{table} dropped.") 62 | self.conn.commit() 63 | finally: 64 | cur.close() 65 | 66 | def stats(self, store_type: str) -> None: 67 | template = "SELECT '{table}', COUNT(*), pg_size_pretty(pg_total_relation_size('{table}')) FROM {table}" 68 | sql = " UNION ALL ".join( 69 | template.format(table=tbl) for tbl in self.tables[store_type] 70 | ) 71 | 72 | cur = self.conn.cursor() 73 | try: 74 | print(f"Storage for Postgres {store_type}.") 75 | print("{:<15} | {:>15} | {:>9}".format("Table", "Rows", "Size")) 76 | print("-" * 45) # Print a line separator 77 | 78 | cur.execute(sql) 79 | for row in cur.fetchall(): 80 | formatted_row_count = f"{row[1]:,}" 81 | print(f"{row[0]:<15} | {formatted_row_count:>15} | {row[2]:>9}") 82 | 83 | print() 84 | finally: 85 | cur.close() 86 | 87 | def __del__(self): 88 | if hasattr(self, "conn") and self.conn: 89 | self.conn.close() 90 | 91 | 92 | class Simple: 93 | def wipe(self, store_type: str) -> None: 94 | assert store_type == "nodestore" 95 | from llama_index.core.storage.docstore.types import ( 96 | DEFAULT_PERSIST_FNAME as DOCSTORE, 97 | ) 98 | from llama_index.core.storage.index_store.types import ( 99 | DEFAULT_PERSIST_FNAME as INDEXSTORE, 100 | ) 101 | 102 | for store in (DOCSTORE, INDEXSTORE): 103 | wipe_file(str((local_data_path / store).absolute())) 104 | 105 | 106 | class Chroma: 107 | def wipe(self, store_type: str) -> None: 108 | assert store_type == "vectorstore" 109 | wipe_tree(str((local_data_path / "chroma_db").absolute())) 110 | 111 | 112 | class Qdrant: 113 | COLLECTION = ( 114 | "make_this_parameterizable_per_api_call" # ?! see vector_store_component.py 115 | ) 116 | 117 | def __init__(self) -> None: 118 | try: 119 | from qdrant_client import QdrantClient # type: ignore 120 | except ImportError: 121 | raise ImportError("Qdrant dependencies not found") from None 122 | self.client = QdrantClient(**settings().qdrant.model_dump(exclude_none=True)) 123 | 124 | def wipe(self, store_type: str) -> None: 125 | assert store_type == "vectorstore" 126 | try: 127 | self.client.delete_collection(self.COLLECTION) 128 | print("Collection dropped successfully.") 129 | except Exception as e: 130 | print("Error dropping collection:", e) 131 | 132 | def stats(self, store_type: str) -> None: 133 | print(f"Storage for Qdrant {store_type}.") 134 | try: 135 | collection_data = self.client.get_collection(self.COLLECTION) 136 | if collection_data: 137 | # Collection Info 138 | # https://qdrant.tech/documentation/concepts/collections/ 139 | print(f"\tPoints: {collection_data.points_count:,}") 140 | print(f"\tVectors: {collection_data.vectors_count:,}") 141 | print(f"\tIndex Vectors: {collection_data.indexed_vectors_count:,}") 142 | return 143 | except ValueError: 144 | pass 145 | print("\t- Qdrant collection not found or empty") 146 | 147 | 148 | class Command: 149 | DB_HANDLERS: ClassVar[dict[str, Any]] = { 150 | "simple": Simple, # node store 151 | "chroma": Chroma, # vector store 152 | "postgres": Postgres, # node, index and vector store 153 | "qdrant": Qdrant, # vector store 154 | } 155 | 156 | def for_each_store(self, cmd: str): 157 | for store_type in ("nodestore", "vectorstore"): 158 | database = getattr(settings(), store_type).database 159 | handler_class = self.DB_HANDLERS.get(database) 160 | if handler_class is None: 161 | print(f"No handler found for database '{database}'") 162 | continue 163 | handler_instance = handler_class() # Instantiate the class 164 | # If the DB can handle this cmd dispatch it. 165 | if hasattr(handler_instance, cmd) and callable( 166 | func := getattr(handler_instance, cmd) 167 | ): 168 | func(store_type) 169 | else: 170 | print( 171 | f"Unable to execute command '{cmd}' on '{store_type}' in database '{database}'" 172 | ) 173 | 174 | def execute(self, cmd: str) -> None: 175 | if cmd in ("wipe", "stats"): 176 | self.for_each_store(cmd) 177 | 178 | 179 | if __name__ == "__main__": 180 | parser = argparse.ArgumentParser() 181 | parser.add_argument("mode", help="select a mode to run", choices=["wipe", "stats"]) 182 | args = parser.parse_args() 183 | 184 | Command().execute(args.mode.lower()) 185 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🔒 PrivateGPT 📑 2 | 3 | [![Tests](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml/badge.svg)](https://github.com/zylon-ai/private-gpt/actions/workflows/tests.yml?query=branch%3Amain) 4 | [![Website](https://img.shields.io/website?up_message=check%20it&down_message=down&url=https%3A%2F%2Fdocs.privategpt.dev%2F&label=Documentation)](https://docs.privategpt.dev/) 5 | 6 | 7 | ![Gradio UI](/fern/docs/assets/ui.png?raw=true) 8 | 9 | PrivateGPT is a production-ready AI project that allows you to ask questions about your documents using the power 10 | of Large Language Models (LLMs), even in scenarios without an Internet connection. 100% private, no data leaves your 11 | execution environment at any point. 12 | 13 | The project provides an API offering all the primitives required to build private, context-aware AI applications. 14 | It follows and extends the [OpenAI API standard](https://openai.com/blog/openai-api), 15 | and supports both normal and streaming responses. 16 | 17 | The API is divided into two logical blocks: 18 | 19 | **High-level API**, which abstracts all the complexity of a RAG (Retrieval Augmented Generation) 20 | pipeline implementation: 21 | - Ingestion of documents: internally managing document parsing, 22 | splitting, metadata extraction, embedding generation and storage. 23 | - Chat & Completions using context from ingested documents: 24 | abstracting the retrieval of context, the prompt engineering and the response generation. 25 | 26 | **Low-level API**, which allows advanced users to implement their own complex pipelines: 27 | - Embeddings generation: based on a piece of text. 28 | - Contextual chunks retrieval: given a query, returns the most relevant chunks of text from the ingested documents. 29 | 30 | In addition to this, a working [Gradio UI](https://www.gradio.app/) 31 | client is provided to test the API, together with a set of useful tools such as bulk model 32 | download script, ingestion script, documents folder watch, etc. 33 | 34 | > 👂 **Need help applying PrivateGPT to your specific use case?** 35 | > [Let us know more about it](https://forms.gle/4cSDmH13RZBHV9at7) 36 | > and we'll try to help! We are refining PrivateGPT through your feedback. 37 | 38 | ## 🎞️ Overview 39 | DISCLAIMER: This README is not updated as frequently as the [documentation](https://docs.privategpt.dev/). 40 | Please check it out for the latest updates! 41 | 42 | ### Motivation behind PrivateGPT 43 | Generative AI is a game changer for our society, but adoption in companies of all sizes and data-sensitive 44 | domains like healthcare or legal is limited by a clear concern: **privacy**. 45 | Not being able to ensure that your data is fully under your control when using third-party AI tools 46 | is a risk those industries cannot take. 47 | 48 | ### Primordial version 49 | The first version of PrivateGPT was launched in May 2023 as a novel approach to address the privacy 50 | concerns by using LLMs in a complete offline way. 51 | 52 | That version, which rapidly became a go-to project for privacy-sensitive setups and served as the seed 53 | for thousands of local-focused generative AI projects, was the foundation of what PrivateGPT is becoming nowadays; 54 | thus a simpler and more educational implementation to understand the basic concepts required 55 | to build a fully local -and therefore, private- chatGPT-like tool. 56 | 57 | > It is strongly recommended to do a clean clone and install of this new version of 58 | PrivateGPT if you come from the previous, primordial version. 59 | 60 | ### Present and Future of PrivateGPT 61 | PrivateGPT is now evolving towards becoming a gateway to generative AI models and primitives, including 62 | completions, document ingestion, RAG pipelines and other low-level building blocks. 63 | We want to make it easier for any developer to build AI applications and experiences, as well as provide 64 | a suitable extensive architecture for the community to keep contributing. 65 | 66 | ## 📄 Documentation 67 | Full documentation on installation, dependencies, configuration, running the server, deployment options, 68 | ingesting local documents, API details and UI features can be found here: https://docs.privategpt.dev/ 69 | 70 | ## 🧩 Architecture 71 | Conceptually, PrivateGPT is an API that wraps a RAG pipeline and exposes its 72 | primitives. 73 | * The API is built using [FastAPI](https://fastapi.tiangolo.com/) and follows 74 | [OpenAI's API scheme](https://platform.openai.com/docs/api-reference). 75 | * The RAG pipeline is based on [LlamaIndex](https://www.llamaindex.ai/). 76 | 77 | The design of PrivateGPT allows to easily extend and adapt both the API and the 78 | RAG implementation. Some key architectural decisions are: 79 | * Dependency Injection, decoupling the different components and layers. 80 | * Usage of LlamaIndex abstractions such as `LLM`, `BaseEmbedding` or `VectorStore`, 81 | making it immediate to change the actual implementations of those abstractions. 82 | * Simplicity, adding as few layers and new abstractions as possible. 83 | * Ready to use, providing a full implementation of the API and RAG 84 | pipeline. 85 | 86 | Main building blocks: 87 | * APIs are defined in `private_gpt:server:`. Each package contains an 88 | `_router.py` (FastAPI layer) and an `_service.py` (the 89 | service implementation). Each *Service* uses LlamaIndex base abstractions instead 90 | of specific implementations, 91 | decoupling the actual implementation from its usage. 92 | * Components are placed in 93 | `private_gpt:components:`. Each *Component* is in charge of providing 94 | actual implementations to the base abstractions used in the Services - for example 95 | `LLMComponent` is in charge of providing an actual implementation of an `LLM` 96 | (for example `LlamaCPP` or `OpenAI`). 97 | 98 | ## 💡 Contributing 99 | Contributions are welcomed! To ensure code quality we have enabled several format and 100 | typing checks, just run `make check` before committing to make sure your code is ok. 101 | Remember to test your code! You'll find a tests folder with helpers, and you can run 102 | tests using `make test` command. 103 | 104 | Don't know what to contribute? Here is the public 105 | [Project Board](https://github.com/users/imartinez/projects/3) with several ideas. 106 | 107 | Head over to Discord 108 | #contributors channel and ask for write permissions on that GitHub project. 109 | 110 | ## 💬 Community 111 | Join the conversation around PrivateGPT on our: 112 | - [Twitter (aka X)](https://twitter.com/PrivateGPT_AI) 113 | - [Discord](https://discord.gg/bK6mRVpErU) 114 | 115 | ## 📖 Citation 116 | If you use PrivateGPT in a paper, check out the [Citation file](CITATION.cff) for the correct citation. 117 | You can also use the "Cite this repository" button in this repo to get the citation in different formats. 118 | 119 | Here are a couple of examples: 120 | 121 | ## 🤗 Partners & Supporters 122 | PrivateGPT is actively supported by the teams behind: 123 | * [Qdrant](https://qdrant.tech/), providing the default vector database 124 | * [Fern](https://buildwithfern.com/), providing Documentation and SDKs 125 | * [LlamaIndex](https://www.llamaindex.ai/), providing the base RAG framework and abstractions 126 | 127 | This project has been strongly influenced and supported by other amazing projects like 128 | [LangChain](https://github.com/hwchase17/langchain), 129 | [GPT4All](https://github.com/nomic-ai/gpt4all), 130 | [LlamaCpp](https://github.com/ggerganov/llama.cpp), 131 | [Chroma](https://www.trychroma.com/) 132 | and [SentenceTransformers](https://www.sbert.net/). 133 | -------------------------------------------------------------------------------- /private_gpt/components/vector_store/vector_store_component.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing 3 | 4 | from injector import inject, singleton 5 | from llama_index.core.indices.vector_store import VectorIndexRetriever, VectorStoreIndex 6 | from llama_index.core.vector_stores.types import ( 7 | BasePydanticVectorStore, 8 | FilterCondition, 9 | MetadataFilter, 10 | MetadataFilters, 11 | ) 12 | 13 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 14 | from private_gpt.paths import local_data_path 15 | from private_gpt.settings.settings import Settings 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | def _doc_id_metadata_filter( 21 | context_filter: ContextFilter | None, 22 | ) -> MetadataFilters: 23 | filters = MetadataFilters(filters=[], condition=FilterCondition.OR) 24 | 25 | if context_filter is not None and context_filter.docs_ids is not None: 26 | for doc_id in context_filter.docs_ids: 27 | filters.filters.append(MetadataFilter(key="doc_id", value=doc_id)) 28 | 29 | return filters 30 | 31 | 32 | @singleton 33 | class VectorStoreComponent: 34 | settings: Settings 35 | vector_store: BasePydanticVectorStore 36 | 37 | @inject 38 | def __init__(self, settings: Settings) -> None: 39 | self.settings = settings 40 | match settings.vectorstore.database: 41 | case "postgres": 42 | try: 43 | from llama_index.vector_stores.postgres import ( # type: ignore 44 | PGVectorStore, 45 | ) 46 | except ImportError as e: 47 | raise ImportError( 48 | "Postgres dependencies not found, install with `poetry install --extras vector-stores-postgres`" 49 | ) from e 50 | 51 | if settings.postgres is None: 52 | raise ValueError( 53 | "Postgres settings not found. Please provide settings." 54 | ) 55 | 56 | self.vector_store = typing.cast( 57 | BasePydanticVectorStore, 58 | PGVectorStore.from_params( 59 | **settings.postgres.model_dump(exclude_none=True), 60 | table_name="embeddings", 61 | embed_dim=settings.embedding.embed_dim, 62 | ), 63 | ) 64 | 65 | case "chroma": 66 | try: 67 | import chromadb # type: ignore 68 | from chromadb.config import ( # type: ignore 69 | Settings as ChromaSettings, 70 | ) 71 | 72 | from private_gpt.components.vector_store.batched_chroma import ( 73 | BatchedChromaVectorStore, 74 | ) 75 | except ImportError as e: 76 | raise ImportError( 77 | "ChromaDB dependencies not found, install with `poetry install --extras vector-stores-chroma`" 78 | ) from e 79 | 80 | chroma_settings = ChromaSettings(anonymized_telemetry=False) 81 | chroma_client = chromadb.PersistentClient( 82 | path=str((local_data_path / "chroma_db").absolute()), 83 | settings=chroma_settings, 84 | ) 85 | chroma_collection = chroma_client.get_or_create_collection( 86 | "make_this_parameterizable_per_api_call" 87 | ) # TODO 88 | 89 | self.vector_store = typing.cast( 90 | BasePydanticVectorStore, 91 | BatchedChromaVectorStore( 92 | chroma_client=chroma_client, chroma_collection=chroma_collection 93 | ), 94 | ) 95 | 96 | case "qdrant": 97 | try: 98 | from llama_index.vector_stores.qdrant import ( # type: ignore 99 | QdrantVectorStore, 100 | ) 101 | from qdrant_client import QdrantClient # type: ignore 102 | except ImportError as e: 103 | raise ImportError( 104 | "Qdrant dependencies not found, install with `poetry install --extras vector-stores-qdrant`" 105 | ) from e 106 | 107 | if settings.qdrant is None: 108 | logger.info( 109 | "Qdrant config not found. Using default settings." 110 | "Trying to connect to Qdrant at localhost:6333." 111 | ) 112 | client = QdrantClient() 113 | else: 114 | client = QdrantClient( 115 | **settings.qdrant.model_dump(exclude_none=True) 116 | ) 117 | self.vector_store = typing.cast( 118 | BasePydanticVectorStore, 119 | QdrantVectorStore( 120 | client=client, 121 | collection_name="make_this_parameterizable_per_api_call", 122 | ), # TODO 123 | ) 124 | case "clickhouse": 125 | try: 126 | from clickhouse_connect import ( # type: ignore 127 | get_client, 128 | ) 129 | from llama_index.vector_stores.clickhouse import ( # type: ignore 130 | ClickHouseVectorStore, 131 | ) 132 | except ImportError as e: 133 | raise ImportError( 134 | "ClickHouse dependencies not found, install with `poetry install --extras vector-stores-clickhouse`" 135 | ) from e 136 | 137 | if settings.clickhouse is None: 138 | raise ValueError( 139 | "ClickHouse settings not found. Please provide settings." 140 | ) 141 | 142 | clickhouse_client = get_client( 143 | host=settings.clickhouse.host, 144 | port=settings.clickhouse.port, 145 | username=settings.clickhouse.username, 146 | password=settings.clickhouse.password, 147 | ) 148 | self.vector_store = ClickHouseVectorStore( 149 | clickhouse_client=clickhouse_client 150 | ) 151 | case _: 152 | # Should be unreachable 153 | # The settings validator should have caught this 154 | raise ValueError( 155 | f"Vectorstore database {settings.vectorstore.database} not supported" 156 | ) 157 | 158 | def get_retriever( 159 | self, 160 | index: VectorStoreIndex, 161 | context_filter: ContextFilter | None = None, 162 | similarity_top_k: int = 2, 163 | ) -> VectorIndexRetriever: 164 | # This way we support qdrant (using doc_ids) and the rest (using filters) 165 | return VectorIndexRetriever( 166 | index=index, 167 | similarity_top_k=similarity_top_k, 168 | doc_ids=context_filter.docs_ids if context_filter else None, 169 | filters=( 170 | _doc_id_metadata_filter(context_filter) 171 | if self.settings.vectorstore.database != "qdrant" 172 | else None 173 | ), 174 | ) 175 | 176 | def close(self) -> None: 177 | if hasattr(self.vector_store.client, "close"): 178 | self.vector_store.client.close() 179 | -------------------------------------------------------------------------------- /private_gpt/server/chat/chat_service.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from injector import inject, singleton 4 | from llama_index.core.chat_engine import ContextChatEngine, SimpleChatEngine 5 | from llama_index.core.chat_engine.types import ( 6 | BaseChatEngine, 7 | ) 8 | from llama_index.core.indices import VectorStoreIndex 9 | from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor 10 | from llama_index.core.llms import ChatMessage, MessageRole 11 | from llama_index.core.postprocessor import ( 12 | SentenceTransformerRerank, 13 | SimilarityPostprocessor, 14 | ) 15 | from llama_index.core.storage import StorageContext 16 | from llama_index.core.types import TokenGen 17 | from pydantic import BaseModel 18 | 19 | from private_gpt.components.embedding.embedding_component import EmbeddingComponent 20 | from private_gpt.components.llm.llm_component import LLMComponent 21 | from private_gpt.components.node_store.node_store_component import NodeStoreComponent 22 | from private_gpt.components.vector_store.vector_store_component import ( 23 | VectorStoreComponent, 24 | ) 25 | from private_gpt.open_ai.extensions.context_filter import ContextFilter 26 | from private_gpt.server.chunks.chunks_service import Chunk 27 | from private_gpt.settings.settings import Settings 28 | 29 | 30 | class Completion(BaseModel): 31 | response: str 32 | sources: list[Chunk] | None = None 33 | 34 | 35 | class CompletionGen(BaseModel): 36 | response: TokenGen 37 | sources: list[Chunk] | None = None 38 | 39 | 40 | @dataclass 41 | class ChatEngineInput: 42 | system_message: ChatMessage | None = None 43 | last_message: ChatMessage | None = None 44 | chat_history: list[ChatMessage] | None = None 45 | 46 | @classmethod 47 | def from_messages(cls, messages: list[ChatMessage]) -> "ChatEngineInput": 48 | # Detect if there is a system message, extract the last message and chat history 49 | system_message = ( 50 | messages[0] 51 | if len(messages) > 0 and messages[0].role == MessageRole.SYSTEM 52 | else None 53 | ) 54 | last_message = ( 55 | messages[-1] 56 | if len(messages) > 0 and messages[-1].role == MessageRole.USER 57 | else None 58 | ) 59 | # Remove from messages list the system message and last message, 60 | # if they exist. The rest is the chat history. 61 | if system_message: 62 | messages.pop(0) 63 | if last_message: 64 | messages.pop(-1) 65 | chat_history = messages if len(messages) > 0 else None 66 | 67 | return cls( 68 | system_message=system_message, 69 | last_message=last_message, 70 | chat_history=chat_history, 71 | ) 72 | 73 | 74 | @singleton 75 | class ChatService: 76 | settings: Settings 77 | 78 | @inject 79 | def __init__( 80 | self, 81 | settings: Settings, 82 | llm_component: LLMComponent, 83 | vector_store_component: VectorStoreComponent, 84 | embedding_component: EmbeddingComponent, 85 | node_store_component: NodeStoreComponent, 86 | ) -> None: 87 | self.settings = settings 88 | self.llm_component = llm_component 89 | self.embedding_component = embedding_component 90 | self.vector_store_component = vector_store_component 91 | self.storage_context = StorageContext.from_defaults( 92 | vector_store=vector_store_component.vector_store, 93 | docstore=node_store_component.doc_store, 94 | index_store=node_store_component.index_store, 95 | ) 96 | self.index = VectorStoreIndex.from_vector_store( 97 | vector_store_component.vector_store, 98 | storage_context=self.storage_context, 99 | llm=llm_component.llm, 100 | embed_model=embedding_component.embedding_model, 101 | show_progress=True, 102 | ) 103 | 104 | def _chat_engine( 105 | self, 106 | system_prompt: str | None = None, 107 | use_context: bool = False, 108 | context_filter: ContextFilter | None = None, 109 | ) -> BaseChatEngine: 110 | settings = self.settings 111 | if use_context: 112 | vector_index_retriever = self.vector_store_component.get_retriever( 113 | index=self.index, 114 | context_filter=context_filter, 115 | similarity_top_k=self.settings.rag.similarity_top_k, 116 | ) 117 | node_postprocessors = [ 118 | MetadataReplacementPostProcessor(target_metadata_key="window"), 119 | SimilarityPostprocessor( 120 | similarity_cutoff=settings.rag.similarity_value 121 | ), 122 | ] 123 | 124 | if settings.rag.rerank.enabled: 125 | rerank_postprocessor = SentenceTransformerRerank( 126 | model=settings.rag.rerank.model, top_n=settings.rag.rerank.top_n 127 | ) 128 | node_postprocessors.append(rerank_postprocessor) 129 | 130 | return ContextChatEngine.from_defaults( 131 | system_prompt=system_prompt, 132 | retriever=vector_index_retriever, 133 | llm=self.llm_component.llm, # Takes no effect at the moment 134 | node_postprocessors=node_postprocessors, 135 | ) 136 | else: 137 | return SimpleChatEngine.from_defaults( 138 | system_prompt=system_prompt, 139 | llm=self.llm_component.llm, 140 | ) 141 | 142 | def stream_chat( 143 | self, 144 | messages: list[ChatMessage], 145 | use_context: bool = False, 146 | context_filter: ContextFilter | None = None, 147 | ) -> CompletionGen: 148 | chat_engine_input = ChatEngineInput.from_messages(messages) 149 | last_message = ( 150 | chat_engine_input.last_message.content 151 | if chat_engine_input.last_message 152 | else None 153 | ) 154 | system_prompt = ( 155 | chat_engine_input.system_message.content 156 | if chat_engine_input.system_message 157 | else None 158 | ) 159 | chat_history = ( 160 | chat_engine_input.chat_history if chat_engine_input.chat_history else None 161 | ) 162 | 163 | chat_engine = self._chat_engine( 164 | system_prompt=system_prompt, 165 | use_context=use_context, 166 | context_filter=context_filter, 167 | ) 168 | streaming_response = chat_engine.stream_chat( 169 | message=last_message if last_message is not None else "", 170 | chat_history=chat_history, 171 | ) 172 | sources = [Chunk.from_node(node) for node in streaming_response.source_nodes] 173 | completion_gen = CompletionGen( 174 | response=streaming_response.response_gen, sources=sources 175 | ) 176 | return completion_gen 177 | 178 | def chat( 179 | self, 180 | messages: list[ChatMessage], 181 | use_context: bool = False, 182 | context_filter: ContextFilter | None = None, 183 | ) -> Completion: 184 | chat_engine_input = ChatEngineInput.from_messages(messages) 185 | last_message = ( 186 | chat_engine_input.last_message.content 187 | if chat_engine_input.last_message 188 | else None 189 | ) 190 | system_prompt = ( 191 | chat_engine_input.system_message.content 192 | if chat_engine_input.system_message 193 | else None 194 | ) 195 | chat_history = ( 196 | chat_engine_input.chat_history if chat_engine_input.chat_history else None 197 | ) 198 | 199 | chat_engine = self._chat_engine( 200 | system_prompt=system_prompt, 201 | use_context=use_context, 202 | context_filter=context_filter, 203 | ) 204 | wrapped_response = chat_engine.chat( 205 | message=last_message if last_message is not None else "", 206 | chat_history=chat_history, 207 | ) 208 | sources = [Chunk.from_node(node) for node in wrapped_response.source_nodes] 209 | completion = Completion(response=wrapped_response.response, sources=sources) 210 | return completion 211 | --------------------------------------------------------------------------------