├── docs
├── docs
│ ├── index.md
│ ├── api_reference
│ │ └── llama_deploy
│ │ │ ├── types.md
│ │ │ ├── python_sdk.md
│ │ │ └── apiserver.md
│ ├── _static
│ │ ├── assets
│ │ │ └── LlamaLogoBrowserTab.png
│ │ ├── css
│ │ │ └── custom.css
│ │ └── js
│ │ │ ├── leadfeeder.js
│ │ │ └── mendablesearch.js
│ ├── css
│ │ ├── custom.css
│ │ └── style.css
│ ├── module_guides
│ │ └── llama_deploy
│ │ │ ├── 40_llamactl.md
│ │ │ ├── 20_core_components.md
│ │ │ ├── 30_python_sdk.md
│ │ │ ├── index.md
│ │ │ └── 10_getting_started.md
│ └── javascript
│ │ ├── llms_example.js
│ │ └── mendablesearch.js
├── overrides
│ ├── main.html
│ └── partials
│ │ ├── copyright.html
│ │ └── search.html
├── pyproject.toml
├── README.md
└── mkdocs.yml
├── tests
├── __init__.py
├── cli
│ ├── __init__.py
│ ├── internal
│ │ ├── __init__.py
│ │ └── test_config.py
│ ├── data
│ │ ├── deployment.yaml
│ │ └── config.yaml
│ ├── test_cli.py
│ ├── test_sessions.py
│ ├── test_deploy.py
│ ├── conftest.py
│ ├── test_init.py
│ ├── test_status.py
│ └── test_run.py
├── client
│ ├── models
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ └── test_model.py
│ └── test_client.py
└── apiserver
│ ├── routers
│ ├── __init__.py
│ └── test_status.py
│ ├── data
│ ├── .env
│ ├── workflow
│ │ ├── __init__.py
│ │ └── workflow_test.py
│ ├── local.yaml
│ ├── git_service.yaml
│ ├── env_variables.yaml
│ ├── python_dependencies.yaml
│ ├── service_ports.yaml
│ ├── python_dependencies_kitchen_sink.yaml
│ ├── with_ui.yaml
│ └── example.yaml
│ ├── source_managers
│ ├── __init__.py
│ ├── test_git.py
│ └── test_local.py
│ ├── test_app.py
│ ├── test_settings.py
│ ├── conftest.py
│ ├── test_config_parser.py
│ └── test_server.py
├── e2e_tests
├── __init__.py
├── apiserver
│ ├── __init__.py
│ ├── rc
│ │ ├── src
│ │ │ ├── __init__.py
│ │ │ └── workflow.py
│ │ └── deployment.yml
│ ├── deployments
│ │ ├── src
│ │ │ ├── .env
│ │ │ ├── __init__.py
│ │ │ ├── workflow_reload.py
│ │ │ ├── workflow_hitl.py
│ │ │ ├── workflow.py
│ │ │ └── workflow_env.py
│ │ ├── deployment_hitl.yml
│ │ ├── deployment2.yml
│ │ ├── deployment_streaming.yml
│ │ ├── deployment1.yml
│ │ ├── deployment_reload1.yml
│ │ ├── deployment_reload2.yml
│ │ ├── deployment_env_local.yml
│ │ └── deployment_env_git.yml
│ ├── test_autodeploy.py
│ ├── test_status.py
│ ├── test_env_vars_local.py
│ ├── test_env_vars_git.py
│ ├── test_service_entrypoint.py
│ ├── test_streaming.py
│ ├── test_hitl.py
│ ├── test_reload.py
│ ├── test_deploy.py
│ └── conftest.py
└── README.md
├── templates
└── basic
│ ├── src
│ ├── __init__.py
│ └── workflow.py
│ └── ui
│ ├── app
│ ├── favicon.ico
│ ├── globals.css
│ └── layout.tsx
│ ├── postcss.config.mjs
│ ├── public
│ ├── file.svg
│ └── logo-dark-light.svg
│ ├── next.config.ts
│ ├── eslint.config.mjs
│ ├── .gitignore
│ ├── tsconfig.json
│ └── package.json
├── llama_deploy
├── apiserver
│ ├── __init__.py
│ ├── routers
│ │ ├── __init__.py
│ │ └── status.py
│ ├── source_managers
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── git.py
│ │ └── local.py
│ ├── __main__.py
│ ├── stats.py
│ ├── app.py
│ ├── server.py
│ └── settings.py
├── client
│ ├── __init__.py
│ ├── models
│ │ ├── __init__.py
│ │ └── model.py
│ ├── base.py
│ └── client.py
├── cli
│ ├── __main__.py
│ ├── internal
│ │ ├── utils.py
│ │ └── config.py
│ ├── sessions.py
│ ├── status.py
│ ├── deploy.py
│ ├── run.py
│ ├── serve.py
│ └── __init__.py
├── __init__.py
└── types
│ ├── apiserver.py
│ ├── __init__.py
│ └── core.py
├── examples
├── python_dependencies
│ ├── .python-version
│ ├── requirements.txt
│ ├── pyproject.toml
│ ├── deployment.yaml
│ ├── workflow.py
│ ├── uv.lock
│ └── README.md
├── python_fullstack
│ ├── frontend
│ │ ├── frontend
│ │ │ ├── __init__.py
│ │ │ ├── session_list
│ │ │ │ ├── __init__.py
│ │ │ │ ├── component.py
│ │ │ │ └── state.py
│ │ │ ├── style.py
│ │ │ ├── frontend.py
│ │ │ └── state.py
│ │ ├── requirements.txt
│ │ ├── .gitignore
│ │ ├── assets
│ │ │ └── favicon.ico
│ │ ├── rxconfig.py
│ │ └── dockerfile
│ ├── llama_deploy_frontend.png
│ ├── workflows
│ │ ├── data
│ │ │ └── attention.pdf
│ │ ├── __init__.py
│ │ ├── requirements.txt
│ │ ├── dockerfile
│ │ └── agent_workflow.py
│ ├── python_fullstack.yaml
│ ├── docker-compose.yml
│ └── README.md
├── quick_start
│ ├── ui
│ │ ├── app
│ │ │ ├── favicon.ico
│ │ │ ├── globals.css
│ │ │ ├── layout.tsx
│ │ │ └── confetti
│ │ │ │ └── page.tsx
│ │ ├── postcss.config.mjs
│ │ ├── public
│ │ │ ├── file.svg
│ │ │ └── logo-dark-light.svg
│ │ ├── next.config.ts
│ │ ├── eslint.config.mjs
│ │ ├── .gitignore
│ │ ├── tsconfig.json
│ │ └── package.json
│ ├── quick_start.yml
│ └── src
│ │ └── workflow.py
├── llamacloud
│ └── google_drive
│ │ ├── src
│ │ ├── config.yml
│ │ └── workflow.py
│ │ └── deployment.yml
└── google_cloud_run
│ ├── deployment.yml
│ ├── src
│ └── workflow.py
│ └── Dockerfile
├── system_diagram.png
├── .taplo.toml
├── .gitignore
├── .github
├── release.yml
└── workflows
│ ├── gh_project.yml
│ ├── lint.yml
│ ├── e2e_test.yml
│ ├── publish_release.yml
│ ├── unit_test.yml
│ ├── docker_release.yml
│ └── codeql.yml
├── docker
├── run_apiserver.py
├── README.md
├── Dockerfile.base
└── docker-bake.hcl
├── LICENSE
├── CONTRIBUTING.md
├── .pre-commit-config.yaml
└── pyproject.toml
/docs/docs/index.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/e2e_tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/cli/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/templates/basic/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/cli/internal/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/client/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/rc/src/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/apiserver/routers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/apiserver/data/.env:
--------------------------------------------------------------------------------
1 | API_KEY=123
2 |
--------------------------------------------------------------------------------
/tests/apiserver/source_managers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/python_dependencies/.python-version:
--------------------------------------------------------------------------------
1 | 3.13
2 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/frontend/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/.env:
--------------------------------------------------------------------------------
1 | VAR_1=z
2 | API_KEY=123
3 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/frontend/session_list/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-deploy
2 | reflex
3 |
--------------------------------------------------------------------------------
/docs/docs/api_reference/llama_deploy/types.md:
--------------------------------------------------------------------------------
1 | # `types`
2 |
3 | ::: llama_deploy.types
4 |
--------------------------------------------------------------------------------
/llama_deploy/client/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import Client
2 |
3 | __all__ = ["Client"]
4 |
--------------------------------------------------------------------------------
/system_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/system_diagram.png
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %} {% block header %} {{ super() }} {% endblock %}
2 |
--------------------------------------------------------------------------------
/docs/overrides/partials/copyright.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/examples/python_dependencies/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is a test of a version range
2 | fortune-python>1,<1.1
3 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | *.db
2 | *.py[cod]
3 | .web
4 | __pycache__/
5 | assets/external/
6 |
--------------------------------------------------------------------------------
/templates/basic/ui/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/templates/basic/ui/app/favicon.ico
--------------------------------------------------------------------------------
/examples/quick_start/ui/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/quick_start/ui/app/favicon.ico
--------------------------------------------------------------------------------
/templates/basic/ui/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | const config = {
2 | plugins: ["@tailwindcss/postcss"],
3 | };
4 |
5 | export default config;
6 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | const config = {
2 | plugins: ["@tailwindcss/postcss"],
3 | };
4 |
5 | export default config;
6 |
--------------------------------------------------------------------------------
/tests/cli/data/deployment.yaml:
--------------------------------------------------------------------------------
1 | name: TestDeployment
2 |
3 | control-plane: {}
4 |
5 | services:
6 | test-workflow:
7 | name: Test Workflow
8 |
--------------------------------------------------------------------------------
/docs/docs/_static/assets/LlamaLogoBrowserTab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/docs/docs/_static/assets/LlamaLogoBrowserTab.png
--------------------------------------------------------------------------------
/examples/python_fullstack/llama_deploy_frontend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/llama_deploy_frontend.png
--------------------------------------------------------------------------------
/docs/overrides/partials/search.html:
--------------------------------------------------------------------------------
1 | {% import "partials/language.html" as lang with context %}
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/frontend/assets/favicon.ico
--------------------------------------------------------------------------------
/examples/python_fullstack/workflows/data/attention.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/workflows/data/attention.pdf
--------------------------------------------------------------------------------
/tests/apiserver/data/workflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .workflow_test import MyWorkflow, _TestEnvWorkflow
2 |
3 | my_workflow = MyWorkflow()
4 | env_reader_workflow = _TestEnvWorkflow()
5 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/routers/__init__.py:
--------------------------------------------------------------------------------
1 | from .deployments import deployments_router
2 | from .status import status_router
3 |
4 | __all__ = ["deployments_router", "status_router"]
5 |
--------------------------------------------------------------------------------
/examples/llamacloud/google_drive/src/config.yml:
--------------------------------------------------------------------------------
1 | llamacloud:
2 | index_name: ""
3 | project_name: ""
4 | organization_id: ""
5 |
--------------------------------------------------------------------------------
/llama_deploy/client/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .apiserver import ApiServer
2 | from .model import Collection, Model, make_sync
3 |
4 | __all__ = ["ApiServer", "Collection", "Model", "make_sync"]
5 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/rxconfig.py:
--------------------------------------------------------------------------------
1 | import reflex as rx
2 |
3 | config = rx.Config(
4 | app_name="frontend",
5 | api_url="http://localhost:9000",
6 | backend_port=9000,
7 | deployment_name="deployment",
8 | )
9 |
--------------------------------------------------------------------------------
/examples/python_fullstack/workflows/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent_workflow import build_agentic_workflow
2 | from .rag_workflow import build_rag_workflow
3 |
4 |
5 | rag_w = build_rag_workflow()
6 | agentic_w = build_agentic_workflow(rag_w)
7 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/source_managers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import SourceManager
2 | from .git import GitSourceManager
3 | from .local import LocalSourceManager
4 |
5 | __all__ = ["GitSourceManager", "LocalSourceManager", "SourceManager"]
6 |
--------------------------------------------------------------------------------
/.taplo.toml:
--------------------------------------------------------------------------------
1 | [formatting]
2 | align_comments = false
3 | reorder_keys = false
4 | # Following are to be consistent with toml-sort
5 | indent_string = " "
6 | array_trailing_comma = false
7 | compact_arrays = true
8 | compact_inline_tables = true
9 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_autodeploy.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | @pytest.mark.asyncio
5 | async def test_autodeploy(client, apiserver_with_rc):
6 | status = await client.apiserver.status()
7 | assert "AutoDeployed" in status.deployments
8 |
--------------------------------------------------------------------------------
/llama_deploy/cli/__main__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from llama_deploy.cli import llamactl
3 |
4 |
5 | def main() -> None:
6 | """CLI entrypoint."""
7 | sys.exit(llamactl())
8 |
9 |
10 | if __name__ == "__main__": # pragma: no cover
11 | main()
12 |
--------------------------------------------------------------------------------
/docs/docs/api_reference/llama_deploy/python_sdk.md:
--------------------------------------------------------------------------------
1 | # Python SDK
2 |
3 | ## Client
4 |
5 | ::: llama_deploy.client.Client
6 | options:
7 | show_bases: false
8 |
9 |
10 | ## API Server functionalities
11 |
12 | ::: llama_deploy.client.models.apiserver
13 |
--------------------------------------------------------------------------------
/examples/python_dependencies/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "uv-requirements"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10, <4.0"
7 | dependencies = [
8 | "pyfiglet>=1.0.3"
9 | ]
10 |
--------------------------------------------------------------------------------
/docs/docs/css/custom.css:
--------------------------------------------------------------------------------
1 | #my-component-root *,
2 | #headlessui-portal-root * {
3 | z-index: 1000000000000;
4 | font-size: 100%;
5 | }
6 |
7 | textarea {
8 | border: 0;
9 | padding: 0;
10 | }
11 |
12 | article p {
13 | margin-bottom: 10px !important;
14 | }
15 |
--------------------------------------------------------------------------------
/tests/cli/data/config.yaml:
--------------------------------------------------------------------------------
1 | current_profile: default
2 | profiles:
3 | default:
4 | insecure: false
5 | server: http://localhost:4501
6 | timeout: 120.0
7 |
8 | test:
9 | insecure: false
10 | server: http://localhost:4501
11 | timeout: 120.0
12 |
--------------------------------------------------------------------------------
/docs/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | #my-component-root *,
2 | #headlessui-portal-root * {
3 | z-index: 1000000000000;
4 | font-size: 100%;
5 | }
6 |
7 | textarea {
8 | border: 0;
9 | padding: 0;
10 | }
11 |
12 | article p {
13 | margin-bottom: 10px !important;
14 | }
15 |
--------------------------------------------------------------------------------
/tests/apiserver/test_app.py:
--------------------------------------------------------------------------------
1 | from fastapi.testclient import TestClient
2 |
3 |
4 | def test_read_main(http_client: TestClient) -> None:
5 | response = http_client.get("/")
6 | assert response.status_code == 200
7 | assert set(response.json().keys()) == {"swagger_docs", "status"}
8 |
--------------------------------------------------------------------------------
/docs/docs/api_reference/llama_deploy/apiserver.md:
--------------------------------------------------------------------------------
1 | # `apiserver`
2 |
3 | ::: llama_deploy.apiserver.deployment
4 |
5 | ::: llama_deploy.apiserver.deployment_config_parser
6 | options:
7 | members:
8 | - DeploymentConfig
9 |
10 | ::: llama_deploy.apiserver.source_managers
11 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/__init__.py:
--------------------------------------------------------------------------------
1 | from .workflow import EchoWorkflow
2 | from .workflow_reload import EchoWithPrompt
3 |
4 | my_workflow = EchoWorkflow()
5 | echo_workflow_en = EchoWithPrompt(prompt_msg="I have received:")
6 | echo_workflow_it = EchoWithPrompt(prompt_msg="Ho ricevuto:")
7 |
--------------------------------------------------------------------------------
/examples/python_fullstack/workflows/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-deploy[rabbitmq, kafka, redis]==0.1.1
2 | llama-index-postprocessor-rankgpt-rerank>=0.2.0
3 | llama-index-vector-stores-qdrant>=0.3.0
4 | llama-index-llms-openai>=0.2.2
5 | llama-index-embeddings-openai>=0.2.4
6 | llama-index-readers-file>=0.2.0
7 |
--------------------------------------------------------------------------------
/examples/google_cloud_run/deployment.yml:
--------------------------------------------------------------------------------
1 | name: CloudRunExample
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: dummy_workflow
7 |
8 | services:
9 | dummy_workflow:
10 | name: Dummy Workflow
11 | source:
12 | type: local
13 | name: .
14 | path: workflow:echo_workflow
15 |
--------------------------------------------------------------------------------
/tests/apiserver/data/local.yaml:
--------------------------------------------------------------------------------
1 | name: LocalDeploymentRelativePath
2 |
3 | control-plane: {}
4 |
5 | services:
6 | test-workflow:
7 | name: Test Workflow
8 | port: 8002
9 | host: localhost
10 | source:
11 | type: local
12 | location: workflow
13 | import-path: workflow:my_workflow
14 |
--------------------------------------------------------------------------------
/docs/docs/module_guides/llama_deploy/40_llamactl.md:
--------------------------------------------------------------------------------
1 | # CLI
2 |
3 | `llamactl` is a command line interface that ships with LlamaDeploy and has the main goal to easily interact with a
4 | running [API Server](./20_core_components.md#api-server).
5 |
6 | ::: mkdocs-click
7 | :module: llama_deploy.cli
8 | :command: llamactl
9 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_hitl.yml:
--------------------------------------------------------------------------------
1 | name: HumanInTheLoop
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: hitl_workflow
7 |
8 | services:
9 | hitl_workflow:
10 | name: HITL Workflow
11 | source:
12 | type: local
13 | name: src
14 | path: src/workflow_hitl:workflow
15 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/rc/deployment.yml:
--------------------------------------------------------------------------------
1 | name: AutoDeployed
2 |
3 | control-plane: {}
4 |
5 | default-service: test-workflow
6 |
7 | services:
8 | test-workflow:
9 | name: Test Workflow
10 | port: 8002
11 | host: localhost
12 | source:
13 | type: local
14 | name: src
15 | path: src/workflow:echo_workflow
16 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # Unit test / coverage reports
6 | .coverage
7 | .coverage.*
8 | coverage.*
9 | .pytest_cache/
10 |
11 | # Build artifacts
12 | dist/
13 |
14 | # Project related
15 | .tool-versions
16 |
17 | # IDEs
18 | .idea
19 | .DS_Store
20 | .vscode
21 | .zed
22 | .claude
23 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment2.yml:
--------------------------------------------------------------------------------
1 | name: TestDeployment2
2 |
3 | control-plane: {}
4 |
5 | default-service: dummy_workflow
6 |
7 | services:
8 | test-workflow:
9 | name: Test Workflow
10 | port: 8002
11 | host: localhost
12 | source:
13 | type: local
14 | name: src
15 | path: src:my_workflow
16 |
--------------------------------------------------------------------------------
/tests/client/models/conftest.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Iterator
2 | from unittest import mock
3 |
4 | import pytest
5 |
6 | from llama_deploy.client import Client
7 |
8 |
9 | @pytest.fixture
10 | def client(monkeypatch: Any) -> Iterator[Client]:
11 | monkeypatch.setattr(Client, "request", mock.AsyncMock())
12 | yield Client()
13 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_streaming.yml:
--------------------------------------------------------------------------------
1 | name: Streaming
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: streaming_workflow
7 |
8 | services:
9 | streaming_workflow:
10 | name: Streaming Workflow
11 | source:
12 | type: local
13 | name: src
14 | path: src/workflow:streaming_workflow
15 |
--------------------------------------------------------------------------------
/tests/apiserver/data/git_service.yaml:
--------------------------------------------------------------------------------
1 | name: TestDeployment
2 |
3 | control-plane: {}
4 |
5 | services:
6 | test-workflow:
7 | name: Test Workflow
8 | port: 8002
9 | host: localhost
10 | source:
11 | type: git
12 | location: https://github.com/run-llama/llama_deploy.git
13 | import-path: tests/apiserver/data/workflow:my_workflow
14 |
--------------------------------------------------------------------------------
/.github/release.yml:
--------------------------------------------------------------------------------
1 | changelog:
2 | categories:
3 | - title: Breaking Changes ⚠️
4 | labels:
5 | - breaking-change
6 | - title: New Features 🎉
7 | labels:
8 | - '*'
9 | - title: Bug Fixes 🐛
10 | labels:
11 | - bug
12 | - title: Documentation 📚
13 | labels:
14 | - documentation
15 | - example
16 |
--------------------------------------------------------------------------------
/llama_deploy/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | root_logger = logging.getLogger("llama_deploy")
4 |
5 | formatter = logging.Formatter("%(levelname)s:%(name)s - %(message)s")
6 | console_handler = logging.StreamHandler()
7 | console_handler.setFormatter(formatter)
8 | root_logger.addHandler(console_handler)
9 |
10 | root_logger.setLevel(logging.INFO)
11 | root_logger.propagate = True
12 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment1.yml:
--------------------------------------------------------------------------------
1 | name: TestDeployment1
2 |
3 | control-plane: {}
4 |
5 | default-service: dummy_workflow
6 |
7 | services:
8 | test-workflow:
9 | name: Test Workflow
10 | port: 8002
11 | host: localhost
12 | source:
13 | type: git
14 | name: https://github.com/run-llama/llama_deploy.git
15 | path: e2e_tests/apiserver/deployments/src:my_workflow
16 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_reload1.yml:
--------------------------------------------------------------------------------
1 | name: ReloadMe
2 |
3 | control-plane: {}
4 |
5 | default-service: test-workflow
6 |
7 | services:
8 | test-workflow:
9 | name: Test Workflow
10 | port: 8002
11 | host: localhost
12 | source:
13 | type: git
14 | name: https://github.com/run-llama/llama_deploy.git
15 | path: e2e_tests/apiserver/deployments/src:echo_workflow_en
16 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_reload2.yml:
--------------------------------------------------------------------------------
1 | name: ReloadMe
2 |
3 | control-plane: {}
4 |
5 | default-service: test-workflow
6 |
7 | services:
8 | test-workflow:
9 | name: Test Workflow
10 | port: 8002
11 | host: localhost
12 | source:
13 | type: git
14 | name: https://github.com/run-llama/llama_deploy.git
15 | path: e2e_tests/apiserver/deployments/src:echo_workflow_it
16 |
--------------------------------------------------------------------------------
/examples/python_dependencies/deployment.yaml:
--------------------------------------------------------------------------------
1 | name: dependencies
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: echo_workflow
7 |
8 | services:
9 | echo_workflow:
10 | name: Pretty Echo Workflow
11 | source:
12 | type: local
13 | name: src
14 | path: workflow:echo_workflow
15 | python-dependencies:
16 | - cowpy
17 | - "."
18 | - "requirements.txt"
19 |
--------------------------------------------------------------------------------
/templates/basic/ui/public/file.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/examples/quick_start/quick_start.yml:
--------------------------------------------------------------------------------
1 | name: QuickStart
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: echo_workflow
7 |
8 | services:
9 | echo_workflow:
10 | name: Echo Workflow
11 | source:
12 | type: local
13 | name: src
14 | path: src/workflow:echo_workflow
15 |
16 | ui:
17 | name: My Nextjs App
18 | port: 3001
19 | source:
20 | type: local
21 | name: ui
22 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/public/file.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/__main__.py:
--------------------------------------------------------------------------------
1 | import uvicorn
2 | from prometheus_client import start_http_server
3 |
4 | from .settings import settings
5 |
6 | if __name__ == "__main__":
7 | if settings.prometheus_enabled:
8 | start_http_server(settings.prometheus_port)
9 |
10 | uvicorn.run(
11 | "llama_deploy.apiserver.app:app",
12 | host=settings.host,
13 | port=settings.port,
14 | )
15 |
--------------------------------------------------------------------------------
/llama_deploy/cli/internal/utils.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from platformdirs import user_config_dir
4 |
5 | DEFAULT_PROFILE_NAME = "default"
6 | DEFAULT_CONFIG_FILE_NAME = "config.yaml"
7 | DEFAULT_CONFIG_FOLDER_NAME = "llamactl"
8 |
9 |
10 | def _default_config_path() -> Path:
11 | base = user_config_dir(DEFAULT_CONFIG_FOLDER_NAME, appauthor=False)
12 | return Path(base) / DEFAULT_CONFIG_FILE_NAME
13 |
--------------------------------------------------------------------------------
/templates/basic/ui/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 |
3 | const nextConfig: NextConfig = {
4 | basePath: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH,
5 | env: {
6 | NEXT_PUBLIC_LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME: process.env.LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME || "default",
7 | NEXT_PUBLIC_BASE_PATH: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH,
8 | },
9 | };
10 |
11 | export default nextConfig;
12 |
--------------------------------------------------------------------------------
/docker/run_apiserver.py:
--------------------------------------------------------------------------------
1 | import uvicorn
2 | from prometheus_client import start_http_server
3 |
4 | from llama_deploy.apiserver.settings import settings
5 |
6 | if __name__ == "__main__":
7 | if settings.prometheus_enabled:
8 | start_http_server(settings.prometheus_port)
9 |
10 | uvicorn.run(
11 | "llama_deploy.apiserver.app:app",
12 | host=settings.host,
13 | port=settings.port,
14 | )
15 |
--------------------------------------------------------------------------------
/tests/apiserver/data/env_variables.yaml:
--------------------------------------------------------------------------------
1 | name: MyDeployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | message-queue:
7 | type: simple
8 | host: "127.0.0.1"
9 | port: 8001
10 |
11 | default-service: myworkflow
12 |
13 | services:
14 | myworkflow:
15 | name: My Python Workflow
16 | env:
17 | VAR_1: x
18 | VAR_2: y
19 | env-files:
20 | - .env
21 | source:
22 | type: local
23 | location: workflow
24 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | const nextConfig: NextConfig = {
3 | basePath: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH,
4 | env: {
5 | NEXT_PUBLIC_LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME:
6 | process.env.LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME || "default",
7 | NEXT_PUBLIC_BASE_PATH:
8 | process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH || "",
9 | },
10 | };
11 |
12 | export default nextConfig;
13 |
--------------------------------------------------------------------------------
/.github/workflows/gh_project.yml:
--------------------------------------------------------------------------------
1 | name: Add issues to GitHub project
2 |
3 | on:
4 | issues:
5 | types:
6 | - opened
7 |
8 | jobs:
9 | add-to-project:
10 | name: Add new issues to project for triage
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/add-to-project@v1.0.2
14 | with:
15 | project-url: https://github.com/orgs/run-llama/projects/8
16 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
17 |
--------------------------------------------------------------------------------
/tests/apiserver/data/python_dependencies.yaml:
--------------------------------------------------------------------------------
1 | name: MyDeployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | message-queue:
7 | type: simple
8 | host: "127.0.0.1"
9 | port: 8001
10 |
11 | default-service: myworkflow
12 |
13 | services:
14 | myworkflow:
15 | name: My Python Workflow
16 | python-dependencies:
17 | - "llama-index-core<1"
18 | - "llama-index-llms-openai"
19 | source:
20 | type: local
21 | location: test
22 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/workflow_reload.py:
--------------------------------------------------------------------------------
1 | from workflows import Context, Workflow, step
2 | from workflows.events import StartEvent, StopEvent
3 |
4 |
5 | class EchoWithPrompt(Workflow):
6 | def __init__(self, prompt_msg):
7 | super().__init__()
8 | self._prompt_msg = prompt_msg
9 |
10 | @step
11 | def do_something(self, ctx: Context, ev: StartEvent) -> StopEvent:
12 | return StopEvent(result=f"{self._prompt_msg}{ev.data}")
13 |
--------------------------------------------------------------------------------
/llama_deploy/types/apiserver.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 | from pydantic import BaseModel
4 |
5 |
6 | class StatusEnum(Enum):
7 | HEALTHY = "Healthy"
8 | UNHEALTHY = "Unhealthy"
9 | DOWN = "Down"
10 |
11 |
12 | class Status(BaseModel):
13 | status: StatusEnum
14 | status_message: str
15 | max_deployments: int | None = None
16 | deployments: list[str] | None = None
17 |
18 |
19 | class DeploymentDefinition(BaseModel):
20 | name: str
21 |
--------------------------------------------------------------------------------
/tests/apiserver/data/service_ports.yaml:
--------------------------------------------------------------------------------
1 | name: TestDeployment
2 |
3 | control-plane: {}
4 |
5 | services:
6 | no-port:
7 | name: No Port
8 | source:
9 | type: local
10 | location: workflow
11 |
12 | has-port:
13 | name: Has Port
14 | port: 9999
15 | source:
16 | type: local
17 | location: workflow
18 |
19 | no-port-again:
20 | name: Again no Port
21 | source:
22 | type: local
23 | location: workflow
24 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Linting
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | lint:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v3
14 |
15 | - name: Install uv
16 | uses: astral-sh/setup-uv@v5
17 |
18 | - name: Set up Python
19 | run: uv python install
20 |
21 | - name: Run linter
22 | shell: bash
23 | run: uv run -- pre-commit run -a
24 |
--------------------------------------------------------------------------------
/templates/basic/ui/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import { dirname } from "path";
2 | import { fileURLToPath } from "url";
3 | import { FlatCompat } from "@eslint/eslintrc";
4 |
5 | const __filename = fileURLToPath(import.meta.url);
6 | const __dirname = dirname(__filename);
7 |
8 | const compat = new FlatCompat({
9 | baseDirectory: __dirname,
10 | });
11 |
12 | const eslintConfig = [
13 | ...compat.extends("next/core-web-vitals", "next/typescript"),
14 | ];
15 |
16 | export default eslintConfig;
17 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import { dirname } from "path";
2 | import { fileURLToPath } from "url";
3 | import { FlatCompat } from "@eslint/eslintrc";
4 |
5 | const __filename = fileURLToPath(import.meta.url);
6 | const __dirname = dirname(__filename);
7 |
8 | const compat = new FlatCompat({
9 | baseDirectory: __dirname,
10 | });
11 |
12 | const eslintConfig = [
13 | ...compat.extends("next/core-web-vitals", "next/typescript"),
14 | ];
15 |
16 | export default eslintConfig;
17 |
--------------------------------------------------------------------------------
/tests/apiserver/data/python_dependencies_kitchen_sink.yaml:
--------------------------------------------------------------------------------
1 | name: MyDeployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | message-queue:
7 | type: simple
8 | host: "127.0.0.1"
9 | port: 8001
10 |
11 | default-service: myworkflow
12 |
13 | services:
14 | myworkflow:
15 | name: My Python Workflow
16 | python-dependencies:
17 | - "test<1"
18 | - "./bar/requirements.txt"
19 | - "./foo/bar/" # pyproject.toml
20 | source:
21 | type: local
22 | location: test
23 |
--------------------------------------------------------------------------------
/llama_deploy/types/__init__.py:
--------------------------------------------------------------------------------
1 | from .apiserver import DeploymentDefinition, Status, StatusEnum
2 | from .core import (
3 | ChatMessage,
4 | EventDefinition,
5 | SessionDefinition,
6 | TaskDefinition,
7 | TaskResult,
8 | generate_id,
9 | )
10 |
11 | __all__ = [
12 | "ChatMessage",
13 | "EventDefinition",
14 | "SessionDefinition",
15 | "TaskDefinition",
16 | "TaskResult",
17 | "generate_id",
18 | "DeploymentDefinition",
19 | "Status",
20 | "StatusEnum",
21 | ]
22 |
--------------------------------------------------------------------------------
/tests/apiserver/data/with_ui.yaml:
--------------------------------------------------------------------------------
1 | name: test-deployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: echo_workflow
7 |
8 | services:
9 | test-workflow:
10 | name: Test Workflow
11 | port: 8002
12 | host: localhost
13 | source:
14 | type: local
15 | location: workflow
16 | import-path: workflow:my_workflow
17 |
18 | ui:
19 | name: My Nextjs App
20 | source:
21 | type: git
22 | location: https://github.com/run-llama/llama_deploy.git
23 | import-path: src/ui
24 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_env_local.yml:
--------------------------------------------------------------------------------
1 | name: EnvironmentVariablesLocal
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: test_env_workflow
7 |
8 | services:
9 | test_env_workflow:
10 | name: Workflow
11 | source:
12 | type: local
13 | name: src
14 | env:
15 | VAR_1: x # this gets overwritten because VAR_1 also exists in the provided .env
16 | VAR_2: y
17 | env-files:
18 | - src/.env # relative to source path
19 | path: src/workflow_env:workflow
20 |
--------------------------------------------------------------------------------
/tests/apiserver/test_settings.py:
--------------------------------------------------------------------------------
1 | from llama_deploy.apiserver.settings import ApiserverSettings
2 |
3 |
4 | def test_settings_url() -> None:
5 | s = ApiserverSettings()
6 | assert s.url == "http://127.0.0.1:4501"
7 |
8 | s = ApiserverSettings(use_tls=True)
9 | assert s.url == "https://127.0.0.1:4501"
10 |
11 | s = ApiserverSettings(host="example.com", port=8080)
12 | assert s.url == "http://example.com:8080"
13 |
14 | s = ApiserverSettings(host="example.com", port=80)
15 | assert s.url == "http://example.com"
16 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/deployment_env_git.yml:
--------------------------------------------------------------------------------
1 | name: EnvironmentVariablesGit
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | services:
7 | workflow_git:
8 | name: Git Workflow
9 | source:
10 | type: git
11 | name: https://github.com/run-llama/llama_deploy.git
12 | env:
13 | VAR_1: x # this gets overwritten because VAR_1 also exists in the provided .env
14 | VAR_2: y
15 | env-files:
16 | - tests/apiserver/data/.env # relative to source path
17 | path: tests/apiserver/data/workflow:env_reader_workflow
18 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/workflow_hitl.py:
--------------------------------------------------------------------------------
1 | from workflows import Workflow, step
2 | from workflows.events import (
3 | HumanResponseEvent,
4 | InputRequiredEvent,
5 | StartEvent,
6 | StopEvent,
7 | )
8 |
9 |
10 | class HumanInTheLoopWorkflow(Workflow):
11 | @step
12 | async def step1(self, ev: StartEvent) -> InputRequiredEvent:
13 | return InputRequiredEvent(prefix="Enter a number: ")
14 |
15 | @step
16 | async def step2(self, ev: HumanResponseEvent) -> StopEvent:
17 | return StopEvent(result=ev.response)
18 |
19 |
20 | workflow = HumanInTheLoopWorkflow(timeout=3)
21 |
--------------------------------------------------------------------------------
/templates/basic/ui/app/globals.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 |
3 | :root {
4 | --background: #ffffff;
5 | --foreground: #171717;
6 | }
7 |
8 | @theme inline {
9 | --color-background: var(--background);
10 | --color-foreground: var(--foreground);
11 | --font-sans: var(--font-geist-sans);
12 | --font-mono: var(--font-geist-mono);
13 | }
14 |
15 | @media (prefers-color-scheme: dark) {
16 | :root {
17 | --background: #0a0a0a;
18 | --foreground: #ededed;
19 | }
20 | }
21 |
22 | body {
23 | background: var(--background);
24 | color: var(--foreground);
25 | font-family: Arial, Helvetica, sans-serif;
26 | }
27 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/app/globals.css:
--------------------------------------------------------------------------------
1 | @import "tailwindcss";
2 |
3 | :root {
4 | --background: #ffffff;
5 | --foreground: #171717;
6 | }
7 |
8 | @theme inline {
9 | --color-background: var(--background);
10 | --color-foreground: var(--foreground);
11 | --font-sans: var(--font-geist-sans);
12 | --font-mono: var(--font-geist-mono);
13 | }
14 |
15 | @media (prefers-color-scheme: dark) {
16 | :root {
17 | --background: #0a0a0a;
18 | --foreground: #ededed;
19 | }
20 | }
21 |
22 | body {
23 | background: var(--background);
24 | color: var(--foreground);
25 | font-family: Arial, Helvetica, sans-serif;
26 | }
27 |
--------------------------------------------------------------------------------
/examples/python_fullstack/workflows/dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.10-slim
2 |
3 | EXPOSE 8002
4 | EXPOSE 8003
5 |
6 | WORKDIR /app
7 |
8 | # Copy source code
9 | COPY . .
10 |
11 | # Install libraries for necessary python package builds
12 | RUN apt-get clean && apt-get update \
13 | && apt-get --no-install-recommends install build-essential python3-dev libpq-dev curl -y \
14 | && pip install --no-cache-dir --upgrade pip \
15 | && apt-get -yq update && apt-get -yqq install ssh \
16 | && apt-get clean \
17 | && pip install -r requirements.txt
18 |
19 | # Run the deploy.py script
20 | ENTRYPOINT ["python", "./deploy.py"]
21 |
--------------------------------------------------------------------------------
/docs/docs/_static/js/leadfeeder.js:
--------------------------------------------------------------------------------
1 | (function (ss, ex) {
2 | window.ldfdr =
3 | window.ldfdr ||
4 | function () {
5 | (ldfdr._q = ldfdr._q || []).push([].slice.call(arguments));
6 | };
7 | (function (d, s) {
8 | fs = d.getElementsByTagName(s)[0];
9 | function ce(src) {
10 | var cs = d.createElement(s);
11 | cs.src = src;
12 | cs.async = 1;
13 | fs.parentNode.insertBefore(cs, fs);
14 | }
15 | ce(
16 | "https://sc.lfeeder.com/lftracker_v1_" +
17 | ss +
18 | (ex ? "_" + ex : "") +
19 | ".js",
20 | );
21 | })(document, "script");
22 | })("Xbp1oaEnqwn8EdVj");
23 |
--------------------------------------------------------------------------------
/tests/apiserver/data/workflow/workflow_test.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from workflows import Context, Workflow, step
4 | from workflows.events import StartEvent, StopEvent
5 |
6 |
7 | class MyWorkflow(Workflow):
8 | @step
9 | def do_something(self, ctx: Context, ev: StartEvent) -> StopEvent:
10 | return StopEvent(result=f"Received: {ev.data}")
11 |
12 |
13 | class _TestEnvWorkflow(Workflow):
14 | @step()
15 | async def read_env_vars(self, ctx: Context, ev: StartEvent) -> StopEvent:
16 | env_vars = [f"{v}: {os.environ.get(v)}" for v in ev.get("env_vars_to_read")]
17 | return StopEvent(result=", ".join(env_vars))
18 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_status.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 |
4 | @pytest.mark.asyncio
5 | async def test_status_down(client):
6 | res = await client.apiserver.status()
7 | assert res.status.value == "Down"
8 |
9 |
10 | def test_status_down_sync(client):
11 | res = client.sync.apiserver.status()
12 | assert res.status.value == "Down"
13 |
14 |
15 | @pytest.mark.asyncio
16 | async def test_status_up(apiserver, client):
17 | res = await client.apiserver.status()
18 | assert res.status.value == "Healthy"
19 |
20 |
21 | def test_status_up_sync(apiserver, client):
22 | res = client.sync.apiserver.status()
23 | assert res.status.value == "Healthy"
24 |
--------------------------------------------------------------------------------
/templates/basic/ui/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.*
7 | .yarn/*
8 | !.yarn/patches
9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 |
13 | # testing
14 | /coverage
15 |
16 | # next.js
17 | /.next/
18 | /out/
19 |
20 | # production
21 | /build
22 |
23 | # misc
24 | .DS_Store
25 | *.pem
26 |
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 |
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 |
36 | # vercel
37 | .vercel
38 |
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.*
7 | .yarn/*
8 | !.yarn/patches
9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 |
13 | # testing
14 | /coverage
15 |
16 | # next.js
17 | /.next/
18 | /out/
19 |
20 | # production
21 | /build
22 |
23 | # misc
24 | .DS_Store
25 | *.pem
26 |
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 |
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 |
36 | # vercel
37 | .vercel
38 |
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 |
--------------------------------------------------------------------------------
/examples/quick_start/src/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from workflows import Workflow, step
4 | from workflows.events import StartEvent, StopEvent
5 |
6 |
7 | # create a dummy workflow
8 | class EchoWorkflow(Workflow):
9 | """A dummy workflow with only one step sending back the input given."""
10 |
11 | @step()
12 | async def run_step(self, ev: StartEvent) -> StopEvent:
13 | message = str(ev.get("message", ""))
14 | return StopEvent(result=f"Message received: {message}")
15 |
16 |
17 | echo_workflow = EchoWorkflow()
18 |
19 |
20 | async def main():
21 | print(await echo_workflow.run(message="Hello!"))
22 |
23 |
24 | if __name__ == "__main__":
25 | asyncio.run(main())
26 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_env_vars_local.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 |
5 | from llama_deploy.types.core import TaskDefinition
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_read_env_vars_local(apiserver, client):
10 | here = Path(__file__).parent
11 | deployment_fp = here / "deployments" / "deployment_env_local.yml"
12 | with open(deployment_fp) as f:
13 | deployment = await client.apiserver.deployments.create(
14 | f, base_path=deployment_fp.parent
15 | )
16 |
17 | result = await deployment.tasks.run(
18 | TaskDefinition(service_id="test_env_workflow", input="")
19 | )
20 |
21 | assert result == "var_1: z, var_2: y, api_key: 123"
22 |
--------------------------------------------------------------------------------
/examples/google_cloud_run/src/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from workflows import Workflow, step
4 | from workflows.events import StartEvent, StopEvent
5 |
6 |
7 | # create a dummy workflow
8 | class EchoWorkflow(Workflow):
9 | """A dummy workflow with only one step sending back the input given."""
10 |
11 | @step()
12 | async def run_step(self, ev: StartEvent) -> StopEvent:
13 | message = str(ev.get("message", ""))
14 | return StopEvent(result=f"Message received: {message}")
15 |
16 |
17 | echo_workflow = EchoWorkflow()
18 |
19 |
20 | async def main():
21 | print(await echo_workflow.run(message="Hello!"))
22 |
23 |
24 | if __name__ == "__main__":
25 | asyncio.run(main())
26 |
--------------------------------------------------------------------------------
/templates/basic/ui/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2017",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "noEmit": true,
9 | "esModuleInterop": true,
10 | "module": "esnext",
11 | "moduleResolution": "bundler",
12 | "resolveJsonModule": true,
13 | "isolatedModules": true,
14 | "jsx": "preserve",
15 | "incremental": true,
16 | "plugins": [
17 | {
18 | "name": "next"
19 | }
20 | ],
21 | "paths": {
22 | "@/*": ["./*"]
23 | }
24 | },
25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 | "exclude": ["node_modules"]
27 | }
28 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2017",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "noEmit": true,
9 | "esModuleInterop": true,
10 | "module": "esnext",
11 | "moduleResolution": "bundler",
12 | "resolveJsonModule": true,
13 | "isolatedModules": true,
14 | "jsx": "preserve",
15 | "incremental": true,
16 | "plugins": [
17 | {
18 | "name": "next"
19 | }
20 | ],
21 | "paths": {
22 | "@/*": ["./*"]
23 | }
24 | },
25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 | "exclude": ["node_modules"]
27 | }
28 |
--------------------------------------------------------------------------------
/templates/basic/ui/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ui",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev --turbopack",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@heroicons/react": "^2.2.0",
13 | "next": "15.4.7",
14 | "react": "^19.0.0",
15 | "react-dom": "^19.0.0"
16 | },
17 | "devDependencies": {
18 | "@eslint/eslintrc": "^3",
19 | "@tailwindcss/postcss": "^4",
20 | "@types/node": "^20",
21 | "@types/react": "^19",
22 | "@types/react-dom": "^19",
23 | "eslint": "^9",
24 | "eslint-config-next": "15.3.2",
25 | "tailwindcss": "^4",
26 | "typescript": "^5"
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/e2e_tests/README.md:
--------------------------------------------------------------------------------
1 | # E2E Tests
2 |
3 | E2E tests are run in the `e2e-tests` workflow in `.github/workflows/e2e-tests.yml`.
4 |
5 | Each folder in this directory contains a Python package representing a set of tests for a simple scenario.
6 |
7 | When new folders are added, they will be executed automatically in the CI/CD pipeline by `pytest`.
8 |
9 | To run all the tests:
10 |
11 | ```sh
12 | $ uv run -- pytest -m"e2e"
13 | ```
14 | or
15 | ```sh
16 | $ uv run -- pytest ./e2e_tests
17 | ```
18 |
19 | To run a specific scenario:
20 |
21 | ```sh
22 | $ uv run -- pytest e2e_tests/apiserver
23 | ```
24 |
25 | If you want to see the output of the different services running, pass the `-s` flag to pytest:
26 |
27 | ```sh
28 | $ uv run -- pytest e2e_tests/apiserver/test_deploy.py -s
29 | ```
30 |
--------------------------------------------------------------------------------
/examples/google_cloud_run/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM llamaindex/llama-deploy:main
2 |
3 | # This will be passed at build time
4 | ARG SOURCE_DIR=""
5 |
6 | # Copy the application code (for example, your workflow)
7 | COPY ${SOURCE_DIR} /app/code
8 | WORKDIR /app/code
9 |
10 | # Deploy automatically what's in /app/code when the container starts
11 | ENV LLAMA_DEPLOY_APISERVER_RC_PATH=/app/code
12 | COPY deployment.yml /app/code
13 |
14 | # Cloud Run requires the container to listen to port 8080
15 | ENV LLAMA_DEPLOY_APISERVER_HOST=0.0.0.0
16 | ENV LLAMA_DEPLOY_APISERVER_PORT=8080
17 | EXPOSE 8080
18 |
19 | # In Cloud Run localhost resolves to the public URI on port 80,
20 | # let's override the default and point explicitly to the internal
21 | # host and port
22 | ENV LLAMA_DEPLOY_API_SERVER_URL="http://127.0.0.1:8080"
23 |
--------------------------------------------------------------------------------
/templates/basic/ui/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import type { Metadata } from "next";
2 | import { Geist, Geist_Mono } from "next/font/google";
3 | import "./globals.css";
4 |
5 | const geistSans = Geist({
6 | variable: "--font-geist-sans",
7 | });
8 |
9 | const geistMono = Geist_Mono({
10 | variable: "--font-geist-mono",
11 | });
12 |
13 | export const metadata: Metadata = {
14 | title: "Create Next App",
15 | description: "Generated by create next app",
16 | };
17 |
18 | export default function RootLayout({
19 | children,
20 | }: Readonly<{
21 | children: React.ReactNode;
22 | }>) {
23 | return (
24 |
25 |
28 | {children}
29 |
30 |
31 | );
32 | }
33 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_env_vars_git.py:
--------------------------------------------------------------------------------
1 | import json
2 | from pathlib import Path
3 |
4 | import pytest
5 |
6 | from llama_deploy.types.core import TaskDefinition
7 |
8 |
9 | @pytest.mark.asyncio
10 | async def test_read_env_vars_git(apiserver, client):
11 | here = Path(__file__).parent
12 | deployment_fp = here / "deployments" / "deployment_env_git.yml"
13 | with open(deployment_fp) as f:
14 | deployment = await client.apiserver.deployments.create(
15 | f, base_path=deployment_fp.parent
16 | )
17 |
18 | input_str = json.dumps({"env_vars_to_read": ["VAR_1", "VAR_2", "API_KEY"]})
19 | result = await deployment.tasks.run(
20 | TaskDefinition(service_id="workflow_git", input=input_str)
21 | )
22 |
23 | assert result == "VAR_1: x, VAR_2: y, API_KEY: 123"
24 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import type { Metadata } from "next";
2 | import { Geist, Geist_Mono } from "next/font/google";
3 | import "./globals.css";
4 |
5 | const geistSans = Geist({
6 | variable: "--font-geist-sans",
7 | });
8 |
9 | const geistMono = Geist_Mono({
10 | variable: "--font-geist-mono",
11 | });
12 |
13 | export const metadata: Metadata = {
14 | title: "Create Next App",
15 | description: "Generated by create next app",
16 | };
17 |
18 | export default function RootLayout({
19 | children,
20 | }: Readonly<{
21 | children: React.ReactNode;
22 | }>) {
23 | return (
24 |
25 |
28 | {children}
29 |
30 |
31 | );
32 | }
33 |
--------------------------------------------------------------------------------
/templates/basic/src/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from llama_index.llms.openai import OpenAI
4 | from workflows import Workflow, step
5 | from workflows.events import StartEvent, StopEvent
6 |
7 |
8 | # create a completion workflow
9 | class CompletionWorkflow(Workflow):
10 | """A completion workflow with a single step."""
11 |
12 | llm: OpenAI = OpenAI(model="gpt-4.1-nano")
13 |
14 | @step()
15 | async def run_step(self, ev: StartEvent) -> StopEvent:
16 | message = str(ev.get("message", ""))
17 | response = await self.llm.acomplete(message)
18 | return StopEvent(result=response.text)
19 |
20 |
21 | workflow = CompletionWorkflow()
22 |
23 |
24 | async def main() -> None:
25 | print(await workflow.run(message="Hello!"))
26 |
27 |
28 | if __name__ == "__main__":
29 | asyncio.run(main())
30 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/dockerfile:
--------------------------------------------------------------------------------
1 | # This Dockerfile is used to deploy a simple single-container Reflex app instance.
2 | FROM python:3.10-slim
3 |
4 | # Copy local context to `/app` inside container (see .dockerignore)
5 | WORKDIR /app
6 | COPY . .
7 |
8 | # Install app requirements and reflex in the container
9 | # Deploy templates and prepare app
10 | # Download all npm dependencies and compile frontend
11 | RUN apt-get clean && apt-get update \
12 | && apt-get --no-install-recommends install zip unzip curl -y \
13 | && pip install -r requirements.txt \
14 | && reflex export --frontend-only --no-zip
15 |
16 | # Needed until Reflex properly passes SIGTERM on backend.
17 | STOPSIGNAL SIGKILL
18 |
19 | # Always apply migrations before starting the backend.
20 | CMD [ -d alembic ] && reflex db migrate; reflex run --env prod
21 |
--------------------------------------------------------------------------------
/examples/llamacloud/google_drive/deployment.yml:
--------------------------------------------------------------------------------
1 | name: LlamaCloud_LlamaDeploy_GoogleDrive
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | default-service: llamacloud_workflow
7 |
8 | services:
9 | llamacloud_workflow:
10 | name: LlamaCloud GoogleDrive Data Source Workflow
11 | # We tell LlamaDeploy where to look for our workflow
12 | source:
13 | # In this case, we instruct LlamaDeploy to look in the local filesystem
14 | type: local
15 | # The path relative to this deployment config file where to look for the code. This assumes
16 | # there's an src folder along with the config file containing the file workflow.py we created previously
17 | name: ./src
18 | # This assumes the file workflow.py contains a variable called `echo_workflow` containing our workflow instance
19 | path: workflow:llamacloud_workflow
20 |
--------------------------------------------------------------------------------
/docs/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "docs"
7 | version = "0.1.0"
8 | description = ""
9 | authors = [{name = "Your Name", email = "you@example.com"}]
10 | requires-python = "~=3.11"
11 | readme = "README.md"
12 | dependencies = [
13 | "llama_deploy[awssqs, rabbitmq, kafka, redis]",
14 | "mkdocs>=1.6.1,<2",
15 | "mkdocstrings[python]>=0.26.1,<0.27",
16 | "mkdocs-include-dir-to-nav>=1.2.0,<2",
17 | "mkdocs-material>=9.5.39,<10",
18 | "mkdocs-redirects>=1.2.1,<2",
19 | "mkdocs-click>=0.8.1,<0.9",
20 | "mkdocs-render-swagger-plugin>=0.1.2,<0.2",
21 | "griffe-fieldz>=0.2.0,<0.3",
22 | "mkdocs-github-admonitions-plugin>=0.0.3,<0.0.4"
23 | ]
24 |
25 | [tool.uv]
26 | package = false
27 |
28 | [tool.uv.sources]
29 | llama_deploy = {path = "../", editable = true}
30 |
--------------------------------------------------------------------------------
/.github/workflows/e2e_test.yml:
--------------------------------------------------------------------------------
1 | name: E2E Testing
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | e2e-tests:
11 | runs-on: ubuntu-latest
12 | # E2E tests might get stuck, timeout aggressively for faster feedback
13 | timeout-minutes: 10
14 | strategy:
15 | # Let the matrix finish to see if the failure was transient
16 | fail-fast: false
17 | matrix:
18 | python-version: ["3.10", "3.11", "3.12"]
19 | test-package: ["apiserver"]
20 | steps:
21 | - uses: actions/checkout@v3
22 |
23 | - name: Install uv and set the python version
24 | uses: astral-sh/setup-uv@v5
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 |
28 | - name: Run All E2E Tests
29 | run: uv run -- pytest e2e_tests/${{ matrix.test-package }} -s
30 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/stats.py:
--------------------------------------------------------------------------------
1 | from prometheus_client import Enum
2 |
3 | apiserver_state = Enum(
4 | "apiserver_state",
5 | "Current state of the API server",
6 | states=[
7 | "starting",
8 | "running",
9 | "stopped",
10 | ],
11 | )
12 |
13 | deployment_state = Enum(
14 | "deployment_state",
15 | "Current state of a deployment",
16 | ["deployment_name"],
17 | states=[
18 | "loading_services",
19 | "ready",
20 | "starting_services",
21 | "running",
22 | "stopped",
23 | ],
24 | )
25 |
26 | service_state = Enum(
27 | "service_state",
28 | "Current state of a service attached to a deployment",
29 | ["deployment_name", "service_name"],
30 | states=[
31 | "loading",
32 | "syncing",
33 | "installing",
34 | "ready",
35 | ],
36 | )
37 |
--------------------------------------------------------------------------------
/tests/cli/test_cli.py:
--------------------------------------------------------------------------------
1 | from unittest import mock
2 |
3 | from click.testing import CliRunner
4 |
5 | from llama_deploy.cli import llamactl
6 | from llama_deploy.cli.__main__ import main
7 |
8 |
9 | @mock.patch("llama_deploy.cli.__main__.sys")
10 | @mock.patch("llama_deploy.cli.__main__.llamactl")
11 | def test_main(mocked_cli, mocked_sys) -> None: # type: ignore
12 | mocked_cli.return_value = 0
13 | main()
14 | mocked_sys.exit.assert_called_with(0)
15 |
16 |
17 | def test_root_command(runner: CliRunner) -> None:
18 | result = runner.invoke(llamactl)
19 | assert result.exit_code == 0
20 | # Ensure invoking the root command outputs the help
21 | assert "Usage: llamactl" in result.output
22 |
23 |
24 | def test_wrong_profile(runner: CliRunner) -> None:
25 | result = runner.invoke(llamactl, ["-p", "foo"])
26 | assert result.exit_code == 1
27 |
--------------------------------------------------------------------------------
/examples/python_fullstack/python_fullstack.yaml:
--------------------------------------------------------------------------------
1 | name: MyDeployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | message-queue:
7 | type: redis
8 | # what follows depends on what's in the docker compose file
9 | host: redis
10 | port: 6379
11 |
12 | default-service: agentic_workflow
13 |
14 | services:
15 | agentic_workflow:
16 | name: Agentic Workflow
17 | source:
18 | type: local
19 | name: .
20 | path: workflows:agentic_w
21 | python-dependencies:
22 | - llama-index-postprocessor-rankgpt-rerank>=0.2.0
23 | - llama-index-vector-stores-qdrant>=0.3.0
24 | - llama-index-llms-openai>=0.2.2
25 | - llama-index-embeddings-openai>=0.2.4
26 | - llama-index-readers-file>=0.2.0
27 |
28 | rag_workflow:
29 | name: RAG Workflow
30 | source:
31 | type: local
32 | name: .
33 | path: workflows:rag_w
34 |
--------------------------------------------------------------------------------
/.github/workflows/publish_release.yml:
--------------------------------------------------------------------------------
1 | name: Publish llama-index to PyPI / GitHub
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | tags:
7 | - "v*"
8 |
9 | jobs:
10 | build-n-publish:
11 | name: Build and publish to PyPI
12 | if: github.repository == 'run-llama/llama_deploy'
13 | runs-on: ubuntu-latest
14 | permissions:
15 | contents: write
16 |
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - name: Install uv
21 | uses: astral-sh/setup-uv@v5
22 |
23 | - name: Build and publish
24 | env:
25 | UV_PUBLISH_TOKEN: ${{ secrets.LLAMA_AGENTS_PYPI_TOKEN }}
26 | run: |
27 | uv build
28 | uv publish
29 |
30 | - name: Create GitHub Release
31 | uses: ncipollo/release-action@v1
32 | with:
33 | artifacts: "dist/*"
34 | generateReleaseNotes: true
35 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_service_entrypoint.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import time
4 |
5 | import requests
6 |
7 |
8 | def test_apiserver_entrypoint():
9 | # Customize host and port
10 | env = os.environ.copy()
11 | env["LLAMA_DEPLOY_APISERVER_HOST"] = "localhost"
12 | env["LLAMA_DEPLOY_APISERVER_PORT"] = "4502"
13 | # Start the API server as a subprocess
14 | process = subprocess.Popen(
15 | ["python", "-m", "llama_deploy.apiserver"],
16 | stdout=subprocess.PIPE,
17 | stderr=subprocess.PIPE,
18 | env=env,
19 | )
20 |
21 | try:
22 | # Wait a bit for the server to start
23 | time.sleep(2)
24 |
25 | response = requests.get("http://localhost:4502/status")
26 | assert response.status_code == 200
27 | finally:
28 | # Clean up: terminate the server process
29 | process.terminate()
30 | process.wait()
31 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/rc/src/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from workflows import Context, Workflow, step
4 | from workflows.events import Event, StartEvent, StopEvent
5 |
6 |
7 | class Message(Event):
8 | text: str
9 |
10 |
11 | class EchoWorkflow(Workflow):
12 | """A dummy workflow streaming three events."""
13 |
14 | @step()
15 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent:
16 | for i in range(3):
17 | ctx.write_event_to_stream(Message(text=f"message number {i + 1}"))
18 | await asyncio.sleep(0.5)
19 |
20 | return StopEvent(result="Done.")
21 |
22 |
23 | echo_workflow = EchoWorkflow()
24 |
25 |
26 | async def main():
27 | h = echo_workflow.run(message="Hello!")
28 | async for ev in h.stream_events():
29 | if type(ev) is Message:
30 | print(ev.text)
31 | print(await h)
32 |
33 |
34 | if __name__ == "__main__":
35 | asyncio.run(main())
36 |
--------------------------------------------------------------------------------
/.github/workflows/unit_test.yml:
--------------------------------------------------------------------------------
1 | name: Unit Testing
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 |
9 | jobs:
10 | unit-tests:
11 | runs-on: ubuntu-latest
12 | strategy:
13 | # You can use PyPy versions in python-version.
14 | # For example, pypy-2.7 and pypy-3.8
15 | matrix:
16 | python-version: ["3.10", "3.11", "3.12"]
17 | steps:
18 | - uses: actions/checkout@v3
19 |
20 | - name: Install uv and set the python version
21 | uses: astral-sh/setup-uv@v5
22 | with:
23 | python-version: ${{ matrix.python-version }}
24 |
25 | - name: Run testing
26 | shell: bash
27 | run: uv run -- pytest --cov --cov-report=xml tests
28 |
29 | - if: matrix.python-version == '3.12'
30 | name: Report Coveralls
31 | uses: coverallsapp/github-action@v2
32 | env:
33 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
34 |
--------------------------------------------------------------------------------
/examples/quick_start/ui/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ui",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev --turbopack",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@types/canvas-confetti": "^1.9.0",
13 | "canvas-confetti": "^1.9.3",
14 | "next": "15.4.7",
15 | "react": "^19.0.0",
16 | "react-dom": "^19.0.0"
17 | },
18 | "devDependencies": {
19 | "@eslint/eslintrc": "^3",
20 | "@tailwindcss/postcss": "^4",
21 | "@types/node": "^20",
22 | "@types/react": "^19",
23 | "@types/react-dom": "^19",
24 | "eslint": "^9",
25 | "eslint-config-next": "15.3.2",
26 | "tailwindcss": "^4",
27 | "typescript": "^5"
28 | },
29 | "packageManager": "pnpm@10.11.0+sha512.6540583f41cc5f628eb3d9773ecee802f4f9ef9923cc45b69890fb47991d4b092964694ec3a4f738a420c918a333062c8b925d312f42e4f0c263eb603551f977"
30 | }
31 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_streaming.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 |
5 | from llama_deploy.types import TaskDefinition
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_stream(apiserver, client):
10 | here = Path(__file__).parent
11 | deployment_fp = here / "deployments" / "deployment_streaming.yml"
12 | with open(deployment_fp) as f:
13 | deployment = await client.apiserver.deployments.create(
14 | f, base_path=deployment_fp.parent
15 | )
16 |
17 | task = await deployment.tasks.create(TaskDefinition(input='{"a": "b"}'))
18 |
19 | read_events = []
20 | async for ev in task.events():
21 | if ev and "text" in ev:
22 | read_events.append(ev)
23 | assert len(read_events) == 3
24 | # the workflow produces events sequentially, so here we can assume events arrived in order
25 | for i, ev in enumerate(read_events):
26 | assert ev["text"] == f"message number {i + 1}"
27 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from workflows import Context, Workflow, step
4 | from workflows.events import Event, StartEvent, StopEvent
5 |
6 |
7 | class Message(Event):
8 | text: str
9 |
10 |
11 | class EchoWorkflow(Workflow):
12 | """A dummy workflow streaming three events."""
13 |
14 | @step()
15 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent:
16 | for i in range(3):
17 | ctx.write_event_to_stream(Message(text=f"message number {i + 1}"))
18 | await asyncio.sleep(0.5)
19 |
20 | return StopEvent(result="Done.")
21 |
22 |
23 | streaming_workflow = EchoWorkflow()
24 |
25 |
26 | async def main():
27 | h = streaming_workflow.run(message="Hello!")
28 | async for ev in h.stream_events():
29 | if type(ev) is Message:
30 | print(ev.text)
31 | print(await h)
32 |
33 |
34 | if __name__ == "__main__":
35 | asyncio.run(main())
36 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/frontend/style.py:
--------------------------------------------------------------------------------
1 | # style.py
2 | import reflex as rx
3 |
4 | # Common styles for questions and answers.
5 | shadow = "rgba(0, 0, 0, 0.15) 0px 2px 8px"
6 | chat_margin = "20%"
7 | message_style = dict(
8 | padding="1em",
9 | border_radius="5px",
10 | margin_y="0.5em",
11 | box_shadow=shadow,
12 | max_width="30em",
13 | display="inline-block",
14 | )
15 |
16 | # Set specific styles for questions and answers.
17 | question_style = message_style | dict(
18 | background_color=rx.color("gray", 4),
19 | margin_left=chat_margin,
20 | )
21 | answer_style = message_style | dict(
22 | background_color=rx.color("accent", 8),
23 | margin_right=chat_margin,
24 | )
25 |
26 | # Styles for the action bar.
27 | input_style = dict(
28 | border_width="1px",
29 | padding="0.25em",
30 | box_shadow=shadow,
31 | width="350px",
32 | )
33 | button_style = dict(
34 | background_color=rx.color("accent", 10),
35 | box_shadow=shadow,
36 | )
37 |
--------------------------------------------------------------------------------
/examples/python_dependencies/workflow.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from cowpy import cow
4 | from fortune import fortune
5 | from pyfiglet import Figlet
6 | from workflows import Workflow, step
7 | from workflows.events import StartEvent, StopEvent
8 |
9 |
10 | # create a dummy workflow
11 | class EchoWorkflow(Workflow):
12 | """A dummy workflow with only one step sending back the input given."""
13 |
14 | @step()
15 | async def run_step(self, ev: StartEvent) -> StopEvent:
16 | msg = str(ev.get("message", ""))
17 | font = str(ev.get("font", "blocky"))
18 | fortune_msg = fortune()
19 | f = Figlet(font=font)
20 | ascii_art_message = f.renderText(msg)
21 | ascii_art_message += cow.Stegosaurus().milk(fortune_msg)
22 | return StopEvent(result=ascii_art_message)
23 |
24 |
25 | echo_workflow = EchoWorkflow()
26 |
27 |
28 | async def main():
29 | print(await echo_workflow.run(message="Hello!"))
30 |
31 |
32 | if __name__ == "__main__":
33 | asyncio.run(main())
34 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_hitl.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from pathlib import Path
3 |
4 | import pytest
5 | from workflows.events import HumanResponseEvent
6 |
7 | from llama_deploy.types import TaskDefinition
8 |
9 |
10 | @pytest.mark.asyncio
11 | async def test_hitl(apiserver, client):
12 | here = Path(__file__).parent
13 | deployment_fp = here / "deployments" / "deployment_hitl.yml"
14 | with open(deployment_fp) as f:
15 | deployment = await client.apiserver.deployments.create(
16 | f, base_path=deployment_fp.parent
17 | )
18 |
19 | task_handler = await deployment.tasks.create(TaskDefinition(input="{}"))
20 | ev_def = await task_handler.send_event(
21 | ev=HumanResponseEvent(response="42"), service_name="hitl_workflow"
22 | )
23 |
24 | # wait for workflow to finish
25 | await asyncio.sleep(0.1)
26 |
27 | result = await task_handler.results()
28 | assert ev_def.service_id == "hitl_workflow"
29 | assert result.result == "42", "The human's response is not consistent."
30 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/deployments/src/workflow_env.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import os
3 |
4 | from workflows import Context, Workflow, step
5 | from workflows.events import StartEvent, StopEvent
6 |
7 |
8 | class MyWorkflow(Workflow):
9 | @step()
10 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent:
11 | var_1 = os.environ.get("VAR_1")
12 | var_2 = os.environ.get("VAR_2")
13 | api_key = os.environ.get("API_KEY")
14 | return StopEvent(
15 | # result depends on variables read from environment
16 | result=(f"var_1: {var_1}, var_2: {var_2}, api_key: {api_key}")
17 | )
18 |
19 |
20 | workflow = MyWorkflow()
21 |
22 |
23 | async def main(w: Workflow):
24 | h = w.run()
25 | print(await h)
26 |
27 |
28 | if __name__ == "__main__":
29 | import os
30 |
31 | # set env variables
32 | os.environ["VAR_1"] = "x"
33 | os.environ["VAR_1"] = "y"
34 | os.environ["API_KEY"] = "123"
35 |
36 | w = MyWorkflow()
37 |
38 | asyncio.run(main(w))
39 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_reload.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 |
5 | from llama_deploy.types import TaskDefinition
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_reload(apiserver, client):
10 | here = Path(__file__).parent
11 | deployment_fp = here / "deployments" / "deployment_reload1.yml"
12 | with open(deployment_fp) as f:
13 | deployment = await client.apiserver.deployments.create(
14 | f, base_path=deployment_fp.parent
15 | )
16 |
17 | tasks = deployment.tasks
18 | res = await tasks.run(TaskDefinition(input='{"data": "bar"}'))
19 | assert res == "I have received:bar"
20 |
21 | deployment_fp = here / "deployments" / "deployment_reload2.yml"
22 | with open(deployment_fp) as f:
23 | deployment = await client.apiserver.deployments.create(
24 | f, base_path=deployment_fp.parent, reload=True
25 | )
26 |
27 | tasks = deployment.tasks
28 | res = await tasks.run(TaskDefinition(input='{"data": "bar"}'))
29 | assert res == "Ho ricevuto:bar"
30 |
--------------------------------------------------------------------------------
/llama_deploy/cli/sessions.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from llama_deploy.client import Client
4 |
5 | from .internal.config import ConfigProfile
6 |
7 |
8 | @click.group()
9 | def sessions() -> None:
10 | """Manage sessions for a given deployment."""
11 | pass
12 |
13 |
14 | @click.command()
15 | @click.pass_obj # config_profile
16 | @click.option(
17 | "-d", "--deployment", required=True, is_flag=False, help="Deployment name"
18 | )
19 | @click.pass_context
20 | def create(
21 | ctx: click.Context,
22 | config_profile: ConfigProfile,
23 | deployment: str,
24 | ) -> None:
25 | client = Client(
26 | api_server_url=config_profile.server,
27 | disable_ssl=config_profile.insecure,
28 | timeout=config_profile.timeout,
29 | )
30 |
31 | try:
32 | d = client.sync.apiserver.deployments.get(deployment)
33 | session_def = d.sessions.create()
34 | except Exception as e:
35 | raise click.ClickException(str(e))
36 |
37 | click.echo(session_def)
38 |
39 |
40 | sessions.add_command(create)
41 |
--------------------------------------------------------------------------------
/examples/python_dependencies/uv.lock:
--------------------------------------------------------------------------------
1 | version = 1
2 | revision = 2
3 | requires-python = ">=3.10, <4.0"
4 |
5 | [[package]]
6 | name = "pyfiglet"
7 | version = "1.0.3"
8 | source = { registry = "https://pypi.org/simple" }
9 | sdist = { url = "https://files.pythonhosted.org/packages/94/49/2554c0b7fef12c0b9633352bbd8751cc616f8e8880e0ebab7732c1535564/pyfiglet-1.0.3.tar.gz", hash = "sha256:bad3b55d2eccb30d4693ccfd94573c2a3477dd75f86a0e5465cea51bdbfe2875", size = 833445, upload-time = "2025-06-02T12:13:29.357Z" }
10 | wheels = [
11 | { url = "https://files.pythonhosted.org/packages/51/1d/f2cb03dd71a4dba891f808333fa505a6ed2762a8514d8ead7e423fa77e1b/pyfiglet-1.0.3-py3-none-any.whl", hash = "sha256:671bd101ca6a08dc2d94c6a2cda75a862c5e162b980af47d0ba4023837e36489", size = 1087203, upload-time = "2025-06-02T12:13:27.393Z" },
12 | ]
13 |
14 | [[package]]
15 | name = "uv-requirements"
16 | version = "0.1.0"
17 | source = { virtual = "." }
18 | dependencies = [
19 | { name = "pyfiglet" },
20 | ]
21 |
22 | [package.metadata]
23 | requires-dist = [{ name = "pyfiglet", specifier = ">=1.0.3" }]
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 LlamaIndex
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/llama_deploy/cli/status.py:
--------------------------------------------------------------------------------
1 | import click
2 |
3 | from llama_deploy.client import Client
4 | from llama_deploy.types.apiserver import StatusEnum
5 |
6 | from .internal.config import ConfigProfile
7 |
8 |
9 | @click.command()
10 | @click.pass_obj # config_profile
11 | def status(config_profile: ConfigProfile) -> None:
12 | """Print the API Server status."""
13 | client = Client(
14 | api_server_url=config_profile.server,
15 | disable_ssl=config_profile.insecure,
16 | timeout=config_profile.timeout,
17 | )
18 |
19 | try:
20 | status = client.sync.apiserver.status()
21 | except Exception as e:
22 | raise click.ClickException(str(e))
23 |
24 | if status.status == StatusEnum.HEALTHY:
25 | click.echo("LlamaDeploy is up and running.")
26 | if status.deployments:
27 | click.echo("\nActive deployments:")
28 | for d in status.deployments:
29 | click.echo(f"- {d}")
30 | else:
31 | click.echo("\nCurrently there are no active deployments")
32 | else:
33 | click.echo(f"LlamaDeploy is unhealthy: {status.status_message}")
34 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # LlamaDeploy Documentation
2 |
3 | This repository contains the documentation for LlamaDeploy, built using MkDocs with Material theme.
4 |
5 | ## Setup
6 |
7 | ### Prerequisites
8 | - Python 3.10 or higher
9 | - uv (for dependency management)
10 |
11 | ### Installation
12 |
13 | 1. Clone the repository
14 | 2. Install dependencies using uv:
15 | ```bash
16 | uv sync
17 | ```
18 |
19 | ## Development
20 |
21 | To start the documentation server locally:
22 | ```bash
23 | uv run mkdocs serve
24 | ```
25 |
26 | This will start a development server at `http://127.0.0.1:8000`.
27 |
28 | ## Building
29 |
30 | LlamaDeploy is part of LlamaIndex [documentation portal](https://docs.llamaindex.ai/)
31 | so the build is performed from the [main repository](https://github.com/run-llama/llama_index).
32 |
33 | > [!WARNING]
34 | > When a documentation change is merged here, the change won't be visible until a new
35 | > build is triggered from the LlamaIndex repository.
36 |
37 |
38 | ## Contributing
39 |
40 | Contributions are very welcome!
41 |
42 | 1. Create a new branch for your changes
43 | 2. Make your changes to the documentation
44 | 3. Test locally using `uv run mkdocs serve`
45 | 4. Submit a pull request
46 |
--------------------------------------------------------------------------------
/docs/docs/css/style.css:
--------------------------------------------------------------------------------
1 | .md-container .jp-Cell-outputWrapper .jp-OutputPrompt.jp-OutputArea-prompt,
2 | .md-container .jp-Cell-inputWrapper .jp-InputPrompt.jp-InputArea-prompt {
3 | display: none !important;
4 | }
5 |
6 | /* CSS styles for side-by-side layout */
7 | .container {
8 | display: flex-col;
9 | justify-content: space-between;
10 | margin-bottom: 20px; /* Adjust spacing between sections */
11 | position: sticky;
12 | top: 2.4rem;
13 | z-index: 1000; /* Ensure it's above other content */
14 | background-color: white; /* Match your page background */
15 | padding: 0.2rem;
16 | }
17 |
18 | .example-heading {
19 | margin: 0.2rem !important;
20 | }
21 |
22 | .usage-examples {
23 | width: 100%; /* Adjust the width as needed */
24 | border: 1px solid var(--md-default-fg-color--light);
25 | border-radius: 2px;
26 | padding: 0.2rem;
27 | }
28 |
29 | /* Additional styling for the toggle */
30 | .toggle-example {
31 | cursor: pointer;
32 | color: white;
33 | text-decoration: underline;
34 | background-color: var(--md-primary-fg-color);
35 | padding: 0.2rem;
36 | border-radius: 2px;
37 | }
38 |
39 | .hidden {
40 | display: none;
41 | }
42 |
43 | /* mendable search styling */
44 | #my-component-root > div {
45 | bottom: 100px;
46 | }
47 |
--------------------------------------------------------------------------------
/llama_deploy/client/base.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import httpx
4 | from pydantic_settings import BaseSettings, SettingsConfigDict
5 |
6 |
7 | class _BaseClient(BaseSettings):
8 | """Base type for clients, to be used in Pydantic models to avoid circular imports.
9 |
10 | Settings can be passed to the Client constructor when creating an instance, or defined with environment variables
11 | having names prefixed with the string `LLAMA_DEPLOY_`, e.g. `LLAMA_DEPLOY_DISABLE_SSL`.
12 | """
13 |
14 | model_config = SettingsConfigDict(env_prefix="LLAMA_DEPLOY_")
15 |
16 | api_server_url: str = "http://localhost:4501"
17 | disable_ssl: bool = False
18 | timeout: float | None = 120.0
19 | poll_interval: float = 0.5
20 |
21 | async def request(
22 | self, method: str, url: str | httpx.URL, **kwargs: Any
23 | ) -> httpx.Response:
24 | """Performs an async HTTP request using httpx."""
25 | verify = kwargs.pop("verify", True)
26 | timeout = kwargs.pop("timeout", self.timeout)
27 | async with httpx.AsyncClient(verify=verify) as client:
28 | response = await client.request(method, url, timeout=timeout, **kwargs)
29 | response.raise_for_status()
30 | return response
31 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/app.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from fastapi import FastAPI
5 | from fastapi.middleware.cors import CORSMiddleware
6 | from fastapi.requests import Request
7 | from fastapi.responses import JSONResponse
8 |
9 | from .routers import deployments_router, status_router
10 | from .server import lifespan
11 | from .settings import settings
12 | from .tracing import configure_tracing
13 |
14 | logger = logging.getLogger("uvicorn.info")
15 |
16 |
17 | app = FastAPI(lifespan=lifespan)
18 |
19 | # Setup tracing
20 | configure_tracing(settings)
21 |
22 | # Configure CORS middleware if the environment variable is set
23 | if not os.environ.get("DISABLE_CORS", False):
24 | app.add_middleware(
25 | CORSMiddleware,
26 | allow_origins=["*"], # Allows all origins
27 | allow_credentials=True,
28 | allow_methods=["GET", "POST"],
29 | allow_headers=["Content-Type", "Authorization"],
30 | )
31 |
32 | app.include_router(deployments_router)
33 | app.include_router(status_router)
34 |
35 |
36 | @app.get("/")
37 | async def root(request: Request) -> JSONResponse:
38 | return JSONResponse(
39 | {
40 | "swagger_docs": f"{request.base_url}docs",
41 | "status": f"{request.base_url}status",
42 | }
43 | )
44 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/test_deploy.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import pytest
4 |
5 |
6 | @pytest.mark.asyncio
7 | async def test_deploy(apiserver, client):
8 | here = Path(__file__).parent
9 | deployment_fp = here / "deployments" / "deployment1.yml"
10 | with open(deployment_fp) as f:
11 | await client.apiserver.deployments.create(f, base_path=deployment_fp.parent)
12 |
13 | status = await client.apiserver.status()
14 | assert "TestDeployment1" in status.deployments
15 |
16 |
17 | def test_deploy_sync(apiserver, client):
18 | here = Path(__file__).parent
19 | deployment_fp = here / "deployments" / "deployment1.yml"
20 | with open(deployment_fp) as f:
21 | client.sync.apiserver.deployments.create(f, base_path=deployment_fp.parent)
22 |
23 | assert "TestDeployment1" in client.sync.apiserver.status().deployments
24 |
25 |
26 | @pytest.mark.asyncio
27 | async def test_deploy_local(apiserver, client):
28 | here = Path(__file__).parent
29 | deployment_fp = here / "deployments" / "deployment2.yml"
30 | with open(deployment_fp) as f:
31 | await client.apiserver.deployments.create(
32 | f, base_path=str(deployment_fp.parent.resolve())
33 | )
34 |
35 | status = await client.apiserver.status()
36 | assert "TestDeployment2" in status.deployments
37 |
--------------------------------------------------------------------------------
/tests/apiserver/data/example.yaml:
--------------------------------------------------------------------------------
1 | name: MyDeployment
2 |
3 | control-plane:
4 | port: 8000
5 |
6 | message-queue:
7 | type: simple
8 | host: "127.0.0.1"
9 | port: 8001
10 |
11 | default-service: myworkflow
12 |
13 | services:
14 | myworkflow:
15 | # A python workflow available in a git repo
16 | name: My Python Workflow
17 | source:
18 | type: git
19 | location: git@github.com/myorg/myrepo
20 | import-path: src/python/app # relative to root of the repo
21 | port: 1313
22 | python-dependencies:
23 | # this is a list to match the requirements.txt format
24 | - "llama-index-core<1"
25 | - "llama-index-llms-openai"
26 | # we can also support installing a req file relative to `path`
27 | # if source is a git repository
28 | - "requirements.txt"
29 | env:
30 | VAR_1: x
31 | VAR_2: y
32 | env-files:
33 | - ./.env
34 |
35 | another-workflow:
36 | # A LITS workflow available in a git repo (might be the same)
37 | name: My LITS Workflow
38 | source:
39 | type: git
40 | location: git@github.com/myorg/myrepo
41 | import-path: src/ts/app
42 | port: 1313
43 | ts-dependencies:
44 | # this is a mapping to match the package.json format
45 | "@llamaindex/core": "^0.2.0"
46 | "@notionhq/client": "^2.2.15"
47 |
--------------------------------------------------------------------------------
/.github/workflows/docker_release.yml:
--------------------------------------------------------------------------------
1 | name: Docker image release
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | tags:
9 | - "v[0-9]+.[0-9]+.[0-9]+*"
10 |
11 | env:
12 | DOCKER_REPO_NAME: llamaindex/llama-deploy
13 |
14 | jobs:
15 | build-and-push:
16 | name: Build base image
17 | runs-on: ubuntu-latest
18 | # don't run from forks
19 | if: github.repository_owner == 'run-llama'
20 |
21 | steps:
22 | - name: Checkout
23 | uses: actions/checkout@v4
24 |
25 | - name: Set up QEMU
26 | uses: docker/setup-qemu-action@v3
27 |
28 | - name: Set up Docker Buildx
29 | uses: docker/setup-buildx-action@v3
30 |
31 | - name: Login to DockerHub
32 | uses: docker/login-action@v3
33 | with:
34 | username: ${{ secrets.DOCKER_HUB_USER }}
35 | password: ${{ secrets.DOCKER_HUB_TOKEN }}
36 |
37 | - name: Docker meta
38 | id: meta
39 | uses: docker/metadata-action@v5
40 | with:
41 | images: $DOCKER_REPO_NAME
42 |
43 | - name: Build images
44 | uses: docker/bake-action@v5
45 | env:
46 | IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
47 | LLAMA_DEPLOY_VERSION: ${{ steps.meta.outputs.version }}
48 | with:
49 | workdir: docker
50 | targets: all
51 | push: true
52 |
--------------------------------------------------------------------------------
/e2e_tests/apiserver/conftest.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | from pathlib import Path
3 |
4 | import httpx
5 | import pytest
6 | import uvicorn
7 | from tenacity import retry, wait_exponential
8 |
9 | from llama_deploy.client import Client
10 |
11 |
12 | def run_apiserver():
13 | uvicorn.run("llama_deploy.apiserver.app:app", host="127.0.0.1", port=4501)
14 |
15 |
16 | @retry(wait=wait_exponential(min=1, max=10))
17 | def wait_for_healthcheck():
18 | response = httpx.get("http://127.0.0.1:4501/status/")
19 | response.raise_for_status()
20 |
21 |
22 | @pytest.fixture(scope="function")
23 | def apiserver():
24 | ctx = multiprocessing.get_context("spawn")
25 | p = ctx.Process(target=run_apiserver)
26 | p.start()
27 | wait_for_healthcheck()
28 |
29 | yield
30 |
31 | p.terminate()
32 | p.join(timeout=3)
33 | if p.is_alive():
34 | p.kill()
35 |
36 |
37 | @pytest.fixture(scope="function")
38 | def apiserver_with_rc(monkeypatch):
39 | here = Path(__file__).parent
40 | rc_path = here / "rc"
41 | monkeypatch.setenv("LLAMA_DEPLOY_APISERVER_RC_PATH", str(rc_path))
42 |
43 | p = multiprocessing.Process(target=run_apiserver)
44 | p.start()
45 | wait_for_healthcheck()
46 |
47 | yield
48 |
49 | p.terminate()
50 | p.join(timeout=3)
51 | if p.is_alive():
52 | p.kill()
53 | p.close()
54 |
55 |
56 | @pytest.fixture
57 | def client():
58 | return Client(api_server_url="http://127.0.0.1:4501")
59 |
--------------------------------------------------------------------------------
/llama_deploy/cli/deploy.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import click
4 |
5 | from llama_deploy.client import Client
6 |
7 | from .internal.config import ConfigProfile
8 |
9 |
10 | @click.command()
11 | @click.pass_obj # config_profile
12 | @click.option("--reload", is_flag=True)
13 | @click.option(
14 | "--base-path",
15 | required=False,
16 | type=click.Path(file_okay=False, resolve_path=True, path_type=Path), # type: ignore
17 | )
18 | @click.argument(
19 | "deployment_config_file",
20 | type=click.Path(dir_okay=False, resolve_path=True, path_type=Path), # type: ignore
21 | )
22 | def deploy(
23 | config_profile: ConfigProfile,
24 | reload: bool,
25 | deployment_config_file: Path,
26 | base_path: Path | None,
27 | ) -> None:
28 | """Create or reload a deployment."""
29 | client = Client(
30 | api_server_url=config_profile.server,
31 | disable_ssl=config_profile.insecure,
32 | timeout=config_profile.timeout,
33 | )
34 |
35 | final_base_path = base_path or deployment_config_file.parent
36 |
37 | try:
38 | with open(deployment_config_file, "rb") as f:
39 | deployment = client.sync.apiserver.deployments.create(
40 | f,
41 | base_path=final_base_path,
42 | reload=reload,
43 | )
44 | except Exception as e:
45 | raise click.ClickException(str(e))
46 |
47 | click.echo(f"Deployment successful: {deployment.id}")
48 |
--------------------------------------------------------------------------------
/llama_deploy/client/client.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from typing import Any
3 |
4 | from .base import _BaseClient
5 | from .models import ApiServer, make_sync
6 |
7 |
8 | class Client(_BaseClient):
9 | """The LlamaDeploy Python client.
10 |
11 | The client is gives access to both the asyncio and non-asyncio APIs. To access the sync
12 | API just use methods of `client.sync`.
13 |
14 | Example usage:
15 | ```py
16 | from llama_deploy.client import Client
17 |
18 | # Use the same client instance
19 | c = Client()
20 |
21 | async def an_async_function():
22 | status = await client.apiserver.status()
23 |
24 | def normal_function():
25 | status = client.sync.apiserver.status()
26 | ```
27 | """
28 |
29 | @property
30 | def sync(self) -> "_SyncClient":
31 | """Returns the sync version of the client API."""
32 | try:
33 | asyncio.get_running_loop()
34 | except RuntimeError:
35 | return _SyncClient(**self.model_dump())
36 |
37 | msg = "You cannot use the sync client within an async event loop - just await the async methods directly."
38 | raise RuntimeError(msg)
39 |
40 | @property
41 | def apiserver(self) -> ApiServer:
42 | """Access the API Server functionalities."""
43 | return ApiServer(client=self, id="apiserver")
44 |
45 |
46 | class _SyncClient(_BaseClient):
47 | @property
48 | def apiserver(self) -> Any:
49 | return make_sync(ApiServer)(client=self, id="apiserver")
50 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/source_managers/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from pathlib import Path
3 |
4 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig, SyncPolicy
5 |
6 |
7 | class SourceManager(ABC):
8 | """Protocol to be implemented by classes responsible for managing Deployment sources."""
9 |
10 | def __init__(self, config: DeploymentConfig, base_path: Path | None = None) -> None:
11 | self._config = config
12 | self._base_path = base_path
13 |
14 | @abstractmethod
15 | def sync(
16 | self,
17 | source: str,
18 | destination: str | None = None,
19 | sync_policy: SyncPolicy = SyncPolicy.REPLACE,
20 | ) -> None: # pragma: no cover
21 | """Fetches resources from `source` so they can be used in a deployment.
22 |
23 | Optionally uses `destination` to store data when this makes sense for the
24 | specific source type.
25 | """
26 |
27 | def relative_path(self, source: str) -> str:
28 | """Unfortunately, there's a difference in behavior of how the source managers sync.
29 | The local source manager syncs the source into the /, whereas
30 | the git source manager just syncs the source into the . This is a temporary shim, since
31 | changing this behavior is a breaking change to deployment.yaml configurations. Local source manager
32 | overrides it. In a future major version, this behavior will be made consistent"""
33 | return ""
34 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/routers/status.py:
--------------------------------------------------------------------------------
1 | import httpx
2 | from fastapi import APIRouter
3 | from fastapi.exceptions import HTTPException
4 | from fastapi.responses import PlainTextResponse
5 |
6 | from llama_deploy.apiserver.server import manager
7 | from llama_deploy.apiserver.settings import settings
8 | from llama_deploy.types.apiserver import Status, StatusEnum
9 |
10 | status_router = APIRouter(
11 | prefix="/status",
12 | )
13 |
14 |
15 | @status_router.get("/")
16 | async def status() -> Status:
17 | return Status(
18 | status=StatusEnum.HEALTHY,
19 | max_deployments=manager._max_deployments,
20 | deployments=list(manager._deployments.keys()),
21 | status_message="",
22 | )
23 |
24 |
25 | @status_router.get("/metrics")
26 | async def metrics() -> PlainTextResponse:
27 | """Proxies the Prometheus metrics endpoint through the API Server.
28 |
29 | This endpoint is mostly used in serverless environments where the LlamaDeploy
30 | container cannot expose more than one port (e.g. Knative, Google Cloud Run).
31 | If Prometheus is not enabled, this endpoint returns an empty HTTP-204 response.
32 | """
33 | if not settings.prometheus_enabled:
34 | return PlainTextResponse(status_code=204)
35 |
36 | try:
37 | async with httpx.AsyncClient() as client:
38 | response = await client.get(f"http://127.0.0.1:{settings.prometheus_port}/")
39 | return PlainTextResponse(content=response.text)
40 | except httpx.RequestError as exc:
41 | raise HTTPException(status_code=500, detail=str(exc))
42 |
--------------------------------------------------------------------------------
/tests/apiserver/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from typing import Any, Iterator
3 | from unittest import mock
4 |
5 | import pytest
6 | from fastapi.testclient import TestClient
7 | from workflows import Workflow, step
8 | from workflows.events import StartEvent, StopEvent
9 |
10 | from llama_deploy.apiserver.app import app
11 | from llama_deploy.apiserver.deployment import Deployment
12 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig
13 |
14 |
15 | class SmallWorkflow(Workflow):
16 | @step()
17 | async def run_step(self, ev: StartEvent) -> StopEvent:
18 | return StopEvent(result="Hello, world!")
19 |
20 |
21 | @pytest.fixture
22 | def mock_importlib() -> Iterator[None]:
23 | with mock.patch("llama_deploy.apiserver.deployment.importlib") as importlib:
24 | importlib.import_module.return_value = mock.MagicMock(
25 | my_workflow=SmallWorkflow()
26 | )
27 | yield
28 |
29 |
30 | @pytest.fixture
31 | def data_path() -> Path:
32 | data_p = Path(__file__).parent / "data"
33 | return data_p.resolve()
34 |
35 |
36 | @pytest.fixture
37 | def mocked_deployment(data_path: Path, mock_importlib: Any) -> Iterator[Deployment]:
38 | config = DeploymentConfig.from_yaml(data_path / "git_service.yaml")
39 | with mock.patch("llama_deploy.apiserver.deployment.SOURCE_MANAGERS") as sm_dict:
40 | sm_dict["git"] = mock.MagicMock()
41 | yield Deployment(config=config, base_path=data_path, deployment_path=Path("."))
42 |
43 |
44 | @pytest.fixture
45 | def http_client() -> TestClient:
46 | return TestClient(app)
47 |
--------------------------------------------------------------------------------
/tests/cli/test_sessions.py:
--------------------------------------------------------------------------------
1 | from unittest import mock
2 |
3 | import httpx
4 | from click.testing import CliRunner
5 |
6 | from llama_deploy.cli import llamactl
7 |
8 |
9 | def test_session_create(runner: CliRunner) -> None:
10 | with mock.patch("llama_deploy.cli.sessions.Client") as mocked_client:
11 | mocked_deployment = mock.MagicMock()
12 | mocked_deployment.sessions.create.return_value = mock.MagicMock(
13 | id="test_session"
14 | )
15 | mocked_client.return_value.sync.apiserver.deployments.get.return_value = (
16 | mocked_deployment
17 | )
18 |
19 | result = runner.invoke(
20 | llamactl,
21 | ["sessions", "create", "-d", "deployment_name"],
22 | )
23 |
24 | mocked_client.assert_called_with(
25 | api_server_url="http://localhost:4501", disable_ssl=False, timeout=120.0
26 | )
27 |
28 | mocked_deployment.sessions.create.assert_called_once()
29 | assert result.exit_code == 0
30 |
31 |
32 | def test_sessions_create_error(runner: CliRunner) -> None:
33 | with mock.patch("llama_deploy.cli.sessions.Client") as mocked_client:
34 | mocked_client.return_value.sync.apiserver.deployments.get.side_effect = (
35 | httpx.HTTPStatusError(
36 | "test error", response=mock.MagicMock(), request=mock.MagicMock()
37 | )
38 | )
39 |
40 | result = runner.invoke(
41 | llamactl, ["sessions", "create", "-d", "deployment_name"]
42 | )
43 |
44 | assert result.exit_code == 1
45 | assert result.output == "Error: test error\n"
46 |
--------------------------------------------------------------------------------
/tests/cli/internal/test_config.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from unittest import mock
3 |
4 | from llama_deploy.cli.internal.config import (
5 | Config,
6 | ConfigProfile,
7 | _default_config_path,
8 | load_config,
9 | )
10 | from llama_deploy.cli.internal.utils import DEFAULT_CONFIG_FILE_NAME
11 |
12 |
13 | def test_load_config(data_path: Path) -> None:
14 | test_config_file = data_path / DEFAULT_CONFIG_FILE_NAME
15 | config = load_config(path=test_config_file)
16 | assert "test" in config.profiles
17 |
18 |
19 | def test_load_config_no_path(tmp_path: Path) -> None:
20 | with mock.patch("llama_deploy.cli.internal.utils.user_config_dir") as mock_dir:
21 | mock_dir.return_value = tmp_path
22 | config = load_config(path=None)
23 | assert len(config.profiles) == 1
24 | assert "default" in config.profiles
25 |
26 |
27 | def test__default_config_path() -> None:
28 | assert str(_default_config_path()).endswith(DEFAULT_CONFIG_FILE_NAME)
29 |
30 |
31 | def test_config_write(tmp_path: Path) -> None:
32 | config_path = tmp_path / "test.yaml"
33 | assert not config_path.exists()
34 | config = Config(
35 | current_profile="test", profiles={"test": ConfigProfile()}, path=config_path
36 | )
37 | config.write()
38 | assert config_path.exists()
39 |
40 |
41 | def test_config_dir_doesnt_exist(tmp_path: Path) -> None:
42 | with mock.patch("llama_deploy.cli.internal.utils.user_config_dir") as mock_dir:
43 | mock_dir.return_value = tmp_path / "config" / "folder"
44 | config = load_config(path=None)
45 | assert len(config.profiles) == 1
46 | assert "default" in config.profiles
47 |
--------------------------------------------------------------------------------
/examples/python_fullstack/frontend/frontend/frontend.py:
--------------------------------------------------------------------------------
1 | import reflex as rx
2 |
3 | from frontend import style
4 | from frontend.state import State
5 | from frontend.session_list.component import session_list
6 | from frontend.session_list.state import SessionState
7 |
8 |
9 | def qa(content: str, idx: int) -> rx.Component:
10 | return rx.box(
11 | rx.text(content, style=style.answer_style),
12 | text_align=rx.cond(idx % 2 == 0, "right", "left"),
13 | margin_left="1em",
14 | )
15 |
16 |
17 | def chat() -> rx.Component:
18 | return rx.box(
19 | rx.foreach(State.chat_history, lambda messages, idx: qa(messages, idx))
20 | )
21 |
22 |
23 | def action_bar() -> rx.Component:
24 | return rx.hstack(
25 | rx.input(
26 | value=State.question,
27 | placeholder="Ask a question",
28 | on_change=State.set_question,
29 | on_key_down=lambda key: State.handle_key_down(
30 | key, SessionState.selected_session_id
31 | ),
32 | style=style.input_style,
33 | ),
34 | rx.button(
35 | "Ask",
36 | on_click=lambda: State.answer(SessionState.selected_session_id),
37 | style=style.button_style,
38 | ),
39 | )
40 |
41 |
42 | def index() -> rx.Component:
43 | return rx.center(
44 | rx.hstack(
45 | session_list(),
46 | rx.vstack(
47 | chat(),
48 | action_bar(),
49 | align="center",
50 | ),
51 | margin_left="4",
52 | ),
53 | )
54 |
55 |
56 | app = rx.App()
57 | app.add_page(index, on_load=SessionState.create_default_session)
58 |
--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
1 | # Docker build system
2 |
3 | This folder contains the files needed to build the base `llama_deploy` images that
4 | can be used to simplify deployments by reducing boiler plate code.
5 |
6 | ## Image Development
7 |
8 | Images are built with [BuildKit](https://docs.docker.com/build/buildkit/) and we use
9 | `bake` to orchestrate the process. To build all the available images run:
10 |
11 | ```sh
12 | docker buildx bake all
13 | ```
14 |
15 | You can override any `variable` defined in the `docker-bake.hcl` file and build custom
16 | images, for example if you want to use a branch from the llama_deploy repo instead of
17 | an official release, run:
18 |
19 | ```sh
20 | LLAMA_DEPLOY_VERSION=mybranch_or_tag docker buildx bake
21 | ```
22 |
23 | ### Multi-Platform Builds
24 |
25 | `llama_deploy` images support multiple architectures. Depending on your operating
26 | system and Docker environment, you might not be able to build all of them locally.
27 |
28 | This is the error you might encounter:
29 |
30 | ```
31 | multiple platforms feature is currently not supported for docker driver. Please switch to a different driver
32 | (eg. “docker buildx create --use”)
33 | ```
34 |
35 | Make sure `containerd` image store is enabled, following the instruction in the [Docker documentation](https://docs.docker.com/build/building/multi-platform/#enable-the-containerd-image-store).
36 |
37 | If the problem persists, one solution is to override the `platform` option and
38 | limit local builds to the same architecture as your computer's. For example, on an Apple M1 you can limit the builds
39 | to ARM only by invoking `bake` like this:
40 |
41 | ```sh
42 | docker buildx bake control_plane --set "*.platform=linux/arm64"
43 | ```
44 |
--------------------------------------------------------------------------------
/examples/python_fullstack/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | qdrant:
3 | # Used by the RAG workflow
4 | image: qdrant/qdrant:latest
5 | ports:
6 | - "6333:6333"
7 | - "6334:6334"
8 | volumes:
9 | - qdrant_data:/qdrant/storage
10 |
11 | apiserver:
12 | # LlamaDeploy API server, will run the workflows
13 | image: llamaindex/llama-deploy:main
14 | environment:
15 | QDRANT_HOST: qdrant
16 | OPENAI_API_KEY: $OPENAI_API_KEY
17 | ports:
18 | - "4501:4501"
19 | depends_on:
20 | redis:
21 | condition: service_healthy
22 | healthcheck:
23 | test: llamactl status
24 | interval: 5s
25 | timeout: 3s
26 | retries: 5
27 | volumes:
28 | - ./:/opt/app
29 | working_dir: /opt/app
30 |
31 | deploy_workflows:
32 | # Init container, it deploys python_fullstack.yaml and exits
33 | image: llamaindex/llama-deploy:main
34 | volumes:
35 | - ./python_fullstack.yaml:/opt/python_fullstack.yaml
36 | working_dir: /opt/
37 | depends_on:
38 | apiserver:
39 | condition: service_healthy
40 | entrypoint: llamactl -s http://apiserver:4501 -t 60 deploy python_fullstack.yaml
41 |
42 | frontend:
43 | # UI for this deployment, running at http://localhost:3000
44 | environment:
45 | APISERVER_URL: http://apiserver:4501
46 | DEPLOYMENT_NAME: MyDeployment
47 | build:
48 | context: ./frontend
49 | dockerfile: dockerfile
50 | ports:
51 | - "3000:3000"
52 | - "9000:9000"
53 | volumes:
54 | - ./frontend:/app
55 | depends_on:
56 | deploy_workflows:
57 | condition: service_completed_successfully
58 |
59 | volumes:
60 | qdrant_data:
61 |
--------------------------------------------------------------------------------
/tests/apiserver/routers/test_status.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 | from unittest import mock
3 |
4 | import httpx
5 | from fastapi.testclient import TestClient
6 |
7 | from llama_deploy.apiserver.settings import settings
8 |
9 |
10 | def test_read_main(http_client: TestClient) -> None:
11 | response = http_client.get("/status")
12 | assert response.status_code == 200
13 | assert response.json() == {
14 | "max_deployments": 10,
15 | "deployments": [],
16 | "status": "Healthy",
17 | "status_message": "",
18 | }
19 |
20 |
21 | def test_prom_proxy_off(http_client: TestClient, monkeypatch: Any) -> None:
22 | monkeypatch.setattr(settings, "prometheus_enabled", False)
23 | response = http_client.get("/status/metrics/")
24 | assert response.status_code == 204
25 | assert response.text == ""
26 |
27 |
28 | def test_prom_proxy(http_client: TestClient) -> None:
29 | mock_metrics_response = 'metric1{label="value"} 1.0\nmetric2{label="value"} 2.0'
30 | mock_response = httpx.Response(200, text=mock_metrics_response)
31 |
32 | with mock.patch("httpx.AsyncClient.get", return_value=mock_response):
33 | response = http_client.get("/status/metrics")
34 | assert response.status_code == 200
35 | assert response.text == mock_metrics_response
36 |
37 |
38 | def test_prom_proxy_failure(http_client: TestClient) -> None:
39 | # Mock the HTTP client to raise an exception
40 | with mock.patch(
41 | "httpx.AsyncClient.get", side_effect=httpx.RequestError("Connection failed")
42 | ):
43 | response = http_client.get("/status/metrics")
44 | assert response.status_code == 500
45 | assert response.json()["detail"] == "Connection failed"
46 |
--------------------------------------------------------------------------------
/tests/apiserver/test_config_parser.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig
4 |
5 |
6 | def do_assert(config: DeploymentConfig) -> None:
7 | assert config.name == "MyDeployment"
8 | assert config.default_service == "myworkflow"
9 |
10 | wf_config = config.services["myworkflow"]
11 | assert wf_config.name == "My Python Workflow"
12 | assert wf_config.source
13 | assert wf_config.source.type == "git"
14 | assert wf_config.source.location == "git@github.com/myorg/myrepo"
15 | assert wf_config.import_path == "src/python/app"
16 | assert wf_config.port == 1313
17 | assert wf_config.python_dependencies
18 | assert len(wf_config.python_dependencies) == 3
19 | assert wf_config.env == {"VAR_1": "x", "VAR_2": "y"}
20 | assert wf_config.env_files == ["./.env"]
21 |
22 | wf_config = config.services["another-workflow"]
23 | assert wf_config.name == "My LITS Workflow"
24 | assert wf_config.source
25 | assert wf_config.source.type == "git"
26 | assert wf_config.source.location == "git@github.com/myorg/myrepo"
27 | assert wf_config.import_path == "src/ts/app"
28 | assert wf_config.port == 1313
29 | assert wf_config.ts_dependencies
30 | assert len(wf_config.ts_dependencies) == 2
31 | assert wf_config.ts_dependencies["@llamaindex/core"] == "^0.2.0"
32 |
33 |
34 | def test_load_config_file(data_path: Path) -> None:
35 | config = DeploymentConfig.from_yaml(data_path / "example.yaml")
36 | do_assert(config)
37 |
38 |
39 | def test_from_yaml_bytes(data_path: Path) -> None:
40 | with open(data_path / "example.yaml", "rb") as config_f:
41 | config = DeploymentConfig.from_yaml_bytes(config_f.read())
42 | do_assert(config)
43 |
--------------------------------------------------------------------------------
/llama_deploy/apiserver/source_managers/git.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | from pathlib import Path
3 | from typing import Any
4 |
5 | from git import Repo
6 |
7 | from .base import SourceManager, SyncPolicy
8 |
9 |
10 | class GitSourceManager(SourceManager):
11 | """A SourceManager specialized for sources of type `git`."""
12 |
13 | def sync(
14 | self,
15 | source: str,
16 | destination: str | None = None,
17 | sync_policy: SyncPolicy = SyncPolicy.REPLACE,
18 | ) -> None:
19 | """Clones the repository at URL `source` into a local path `destination`.
20 |
21 | Args:
22 | source: The URL of the git repository. It can optionally contain a branch target using the name convention
23 | `git_repo_url@branch_name`. For example, "https://example.com/llama_deploy.git@branch_name".
24 | destination: The path in the local filesystem where to clone the git repository.
25 | """
26 | if not destination:
27 | raise ValueError("Destination cannot be empty")
28 |
29 | if Path(destination).exists():
30 | # FIXME: pull when SyncPolicy is MERGE
31 | shutil.rmtree(destination)
32 |
33 | url, branch_name = self._parse_source(source)
34 | kwargs: dict[str, Any] = {"url": url, "to_path": destination}
35 | if branch_name:
36 | kwargs["multi_options"] = [f"-b {branch_name}", "--single-branch"]
37 |
38 | Repo.clone_from(**kwargs)
39 |
40 | @staticmethod
41 | def _parse_source(source: str) -> tuple[str, str | None]:
42 | branch_name = None
43 | toks = source.split("@")
44 | url = toks[0]
45 | if len(toks) > 1:
46 | branch_name = toks[1]
47 |
48 | return url, branch_name
49 |
--------------------------------------------------------------------------------
/tests/cli/test_deploy.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from unittest import mock
3 |
4 | import httpx
5 | from click.testing import CliRunner
6 |
7 | from llama_deploy.cli import llamactl
8 |
9 |
10 | def test_deploy(runner: CliRunner, data_path: Path) -> None:
11 | test_config_file = data_path / "deployment.yaml"
12 | mocked_result = mock.MagicMock(id="test_deployment")
13 | with mock.patch("llama_deploy.cli.deploy.Client") as mocked_client:
14 | mocked_client.return_value.sync.apiserver.deployments.create.return_value = (
15 | mocked_result
16 | )
17 |
18 | result = runner.invoke(llamactl, ["-t", "5.0", "deploy", str(test_config_file)])
19 |
20 | assert result.exit_code == 0
21 | assert result.output == "Deployment successful: test_deployment\n"
22 | mocked_client.assert_called_with(
23 | api_server_url="http://localhost:4501", disable_ssl=False, timeout=5.0
24 | )
25 | file_arg = (
26 | mocked_client.return_value.sync.apiserver.deployments.create.call_args
27 | )
28 | assert str(test_config_file) == file_arg.args[0].name
29 |
30 |
31 | def test_deploy_failed(runner: CliRunner, data_path: Path) -> None:
32 | test_config_file = data_path / "deployment.yaml"
33 | with mock.patch("llama_deploy.cli.deploy.Client") as mocked_client:
34 | mocked_client.return_value.sync.apiserver.deployments.create.side_effect = (
35 | httpx.HTTPStatusError(
36 | "Unauthorized!", response=mock.MagicMock(), request=mock.MagicMock()
37 | )
38 | )
39 |
40 | result = runner.invoke(llamactl, ["deploy", str(test_config_file)])
41 | assert result.exit_code == 1
42 | assert result.output == "Error: Unauthorized!\n"
43 |
--------------------------------------------------------------------------------
/llama_deploy/cli/run.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import click
4 | import httpx
5 |
6 | from llama_deploy.client import Client
7 | from llama_deploy.types import TaskDefinition
8 |
9 | from .internal.config import ConfigProfile
10 |
11 |
12 | @click.command()
13 | @click.pass_obj # config_profile
14 | @click.option(
15 | "-d", "--deployment", required=True, is_flag=False, help="Deployment name"
16 | )
17 | @click.option(
18 | "-a",
19 | "--arg",
20 | multiple=True,
21 | is_flag=False,
22 | type=(str, str),
23 | help="'key value' argument to pass to the task, e.g. '-a age 30'",
24 | )
25 | @click.option("-s", "--service", is_flag=False, help="Service name")
26 | @click.option("-i", "--session-id", is_flag=False, help="Session ID")
27 | @click.pass_context
28 | def run(
29 | ctx: click.Context,
30 | config_profile: ConfigProfile,
31 | deployment: str,
32 | arg: tuple[tuple[str, str]],
33 | service: str,
34 | session_id: str,
35 | ) -> None:
36 | """Run tasks from a given service."""
37 | client = Client(
38 | api_server_url=config_profile.server,
39 | disable_ssl=config_profile.insecure,
40 | timeout=config_profile.timeout,
41 | )
42 |
43 | payload = {"input": json.dumps(dict(arg))}
44 | if service:
45 | payload["service_id"] = service
46 | if session_id:
47 | payload["session_id"] = session_id
48 |
49 | try:
50 | d = client.sync.apiserver.deployments.get(deployment)
51 | result = d.tasks.run(TaskDefinition(**payload))
52 | except Exception as e:
53 | extra_info = ""
54 | if isinstance(e, httpx.HTTPStatusError):
55 | extra_info = f" {e.response.text}"
56 |
57 | raise click.ClickException(f"{str(e)}{extra_info}")
58 |
59 | click.echo(result)
60 |
--------------------------------------------------------------------------------
/docs/docs/javascript/llms_example.js:
--------------------------------------------------------------------------------
1 | var exampleTemplate = `Framework Usage
2 | `;
6 |
7 | var exampleMarkdown = `\`\`\`python
8 | from llama_index.core import Settings
9 |
10 | # global default
11 | Settings.llm = llm
12 |
13 | # per-component
14 | # objects that use an LLM accept it as a kwarg
15 | index.as_query_engine(llm=llm)
16 |
17 | index.as_chat_engine(llm=llm)
18 | \`\`\``;
19 |
20 | function addToggleToExample() {
21 | const toggleExample = document.querySelector(".toggle-example");
22 | const usageExamples = document.querySelector(".usage-examples");
23 |
24 | toggleExample.addEventListener("click", function () {
25 | console.log("clicked!");
26 | console.log(usageExamples);
27 | usageExamples.classList.toggle("hidden");
28 | });
29 | }
30 |
31 | // Add marked package as