├── docs ├── docs │ ├── index.md │ ├── api_reference │ │ └── llama_deploy │ │ │ ├── types.md │ │ │ ├── python_sdk.md │ │ │ └── apiserver.md │ ├── _static │ │ ├── assets │ │ │ └── LlamaLogoBrowserTab.png │ │ ├── css │ │ │ └── custom.css │ │ └── js │ │ │ ├── leadfeeder.js │ │ │ └── mendablesearch.js │ ├── css │ │ ├── custom.css │ │ └── style.css │ ├── module_guides │ │ └── llama_deploy │ │ │ ├── 40_llamactl.md │ │ │ ├── 20_core_components.md │ │ │ ├── 30_python_sdk.md │ │ │ ├── index.md │ │ │ └── 10_getting_started.md │ └── javascript │ │ ├── llms_example.js │ │ └── mendablesearch.js ├── overrides │ ├── main.html │ └── partials │ │ ├── copyright.html │ │ └── search.html ├── pyproject.toml ├── README.md └── mkdocs.yml ├── tests ├── __init__.py ├── cli │ ├── __init__.py │ ├── internal │ │ ├── __init__.py │ │ └── test_config.py │ ├── data │ │ ├── deployment.yaml │ │ └── config.yaml │ ├── test_cli.py │ ├── test_sessions.py │ ├── test_deploy.py │ ├── conftest.py │ ├── test_init.py │ ├── test_status.py │ └── test_run.py ├── client │ ├── models │ │ ├── __init__.py │ │ ├── conftest.py │ │ └── test_model.py │ └── test_client.py └── apiserver │ ├── routers │ ├── __init__.py │ └── test_status.py │ ├── data │ ├── .env │ ├── workflow │ │ ├── __init__.py │ │ └── workflow_test.py │ ├── local.yaml │ ├── git_service.yaml │ ├── env_variables.yaml │ ├── python_dependencies.yaml │ ├── service_ports.yaml │ ├── python_dependencies_kitchen_sink.yaml │ ├── with_ui.yaml │ └── example.yaml │ ├── source_managers │ ├── __init__.py │ ├── test_git.py │ └── test_local.py │ ├── test_app.py │ ├── test_settings.py │ ├── conftest.py │ ├── test_config_parser.py │ └── test_server.py ├── e2e_tests ├── __init__.py ├── apiserver │ ├── __init__.py │ ├── rc │ │ ├── src │ │ │ ├── __init__.py │ │ │ └── workflow.py │ │ └── deployment.yml │ ├── deployments │ │ ├── src │ │ │ ├── .env │ │ │ ├── __init__.py │ │ │ ├── workflow_reload.py │ │ │ ├── workflow_hitl.py │ │ │ ├── workflow.py │ │ │ └── workflow_env.py │ │ ├── deployment_hitl.yml │ │ ├── deployment2.yml │ │ ├── deployment_streaming.yml │ │ ├── deployment1.yml │ │ ├── deployment_reload1.yml │ │ ├── deployment_reload2.yml │ │ ├── deployment_env_local.yml │ │ └── deployment_env_git.yml │ ├── test_autodeploy.py │ ├── test_status.py │ ├── test_env_vars_local.py │ ├── test_env_vars_git.py │ ├── test_service_entrypoint.py │ ├── test_streaming.py │ ├── test_hitl.py │ ├── test_reload.py │ ├── test_deploy.py │ └── conftest.py └── README.md ├── templates └── basic │ ├── src │ ├── __init__.py │ └── workflow.py │ └── ui │ ├── app │ ├── favicon.ico │ ├── globals.css │ └── layout.tsx │ ├── postcss.config.mjs │ ├── public │ ├── file.svg │ └── logo-dark-light.svg │ ├── next.config.ts │ ├── eslint.config.mjs │ ├── .gitignore │ ├── tsconfig.json │ └── package.json ├── llama_deploy ├── apiserver │ ├── __init__.py │ ├── routers │ │ ├── __init__.py │ │ └── status.py │ ├── source_managers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── git.py │ │ └── local.py │ ├── __main__.py │ ├── stats.py │ ├── app.py │ ├── server.py │ └── settings.py ├── client │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ └── model.py │ ├── base.py │ └── client.py ├── cli │ ├── __main__.py │ ├── internal │ │ ├── utils.py │ │ └── config.py │ ├── sessions.py │ ├── status.py │ ├── deploy.py │ ├── run.py │ ├── serve.py │ └── __init__.py ├── __init__.py └── types │ ├── apiserver.py │ ├── __init__.py │ └── core.py ├── examples ├── python_dependencies │ ├── .python-version │ ├── requirements.txt │ ├── pyproject.toml │ ├── deployment.yaml │ ├── workflow.py │ ├── uv.lock │ └── README.md ├── python_fullstack │ ├── frontend │ │ ├── frontend │ │ │ ├── __init__.py │ │ │ ├── session_list │ │ │ │ ├── __init__.py │ │ │ │ ├── component.py │ │ │ │ └── state.py │ │ │ ├── style.py │ │ │ ├── frontend.py │ │ │ └── state.py │ │ ├── requirements.txt │ │ ├── .gitignore │ │ ├── assets │ │ │ └── favicon.ico │ │ ├── rxconfig.py │ │ └── dockerfile │ ├── llama_deploy_frontend.png │ ├── workflows │ │ ├── data │ │ │ └── attention.pdf │ │ ├── __init__.py │ │ ├── requirements.txt │ │ ├── dockerfile │ │ └── agent_workflow.py │ ├── python_fullstack.yaml │ ├── docker-compose.yml │ └── README.md ├── quick_start │ ├── ui │ │ ├── app │ │ │ ├── favicon.ico │ │ │ ├── globals.css │ │ │ ├── layout.tsx │ │ │ └── confetti │ │ │ │ └── page.tsx │ │ ├── postcss.config.mjs │ │ ├── public │ │ │ ├── file.svg │ │ │ └── logo-dark-light.svg │ │ ├── next.config.ts │ │ ├── eslint.config.mjs │ │ ├── .gitignore │ │ ├── tsconfig.json │ │ └── package.json │ ├── quick_start.yml │ └── src │ │ └── workflow.py ├── llamacloud │ └── google_drive │ │ ├── src │ │ ├── config.yml │ │ └── workflow.py │ │ └── deployment.yml └── google_cloud_run │ ├── deployment.yml │ ├── src │ └── workflow.py │ └── Dockerfile ├── system_diagram.png ├── .taplo.toml ├── .gitignore ├── .github ├── release.yml └── workflows │ ├── gh_project.yml │ ├── lint.yml │ ├── e2e_test.yml │ ├── publish_release.yml │ ├── unit_test.yml │ ├── docker_release.yml │ └── codeql.yml ├── docker ├── run_apiserver.py ├── README.md ├── Dockerfile.base └── docker-bake.hcl ├── LICENSE ├── CONTRIBUTING.md ├── .pre-commit-config.yaml └── pyproject.toml /docs/docs/index.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /e2e_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /templates/basic/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cli/internal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/client/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/rc/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/apiserver/routers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/apiserver/data/.env: -------------------------------------------------------------------------------- 1 | API_KEY=123 2 | -------------------------------------------------------------------------------- /tests/apiserver/source_managers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/python_dependencies/.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/.env: -------------------------------------------------------------------------------- 1 | VAR_1=z 2 | API_KEY=123 3 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/frontend/session_list/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/requirements.txt: -------------------------------------------------------------------------------- 1 | llama-deploy 2 | reflex 3 | -------------------------------------------------------------------------------- /docs/docs/api_reference/llama_deploy/types.md: -------------------------------------------------------------------------------- 1 | # `types` 2 | 3 | ::: llama_deploy.types 4 | -------------------------------------------------------------------------------- /llama_deploy/client/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import Client 2 | 3 | __all__ = ["Client"] 4 | -------------------------------------------------------------------------------- /system_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/system_diagram.png -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} {% block header %} {{ super() }} {% endblock %} 2 | -------------------------------------------------------------------------------- /docs/overrides/partials/copyright.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/python_dependencies/requirements.txt: -------------------------------------------------------------------------------- 1 | # This is a test of a version range 2 | fortune-python>1,<1.1 3 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/.gitignore: -------------------------------------------------------------------------------- 1 | *.db 2 | *.py[cod] 3 | .web 4 | __pycache__/ 5 | assets/external/ 6 | -------------------------------------------------------------------------------- /templates/basic/ui/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/templates/basic/ui/app/favicon.ico -------------------------------------------------------------------------------- /examples/quick_start/ui/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/quick_start/ui/app/favicon.ico -------------------------------------------------------------------------------- /templates/basic/ui/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /examples/quick_start/ui/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /tests/cli/data/deployment.yaml: -------------------------------------------------------------------------------- 1 | name: TestDeployment 2 | 3 | control-plane: {} 4 | 5 | services: 6 | test-workflow: 7 | name: Test Workflow 8 | -------------------------------------------------------------------------------- /docs/docs/_static/assets/LlamaLogoBrowserTab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/docs/docs/_static/assets/LlamaLogoBrowserTab.png -------------------------------------------------------------------------------- /examples/python_fullstack/llama_deploy_frontend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/llama_deploy_frontend.png -------------------------------------------------------------------------------- /docs/overrides/partials/search.html: -------------------------------------------------------------------------------- 1 | {% import "partials/language.html" as lang with context %} 2 | 3 | 4 |
5 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/frontend/assets/favicon.ico -------------------------------------------------------------------------------- /examples/python_fullstack/workflows/data/attention.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/run-llama/llama_deploy/HEAD/examples/python_fullstack/workflows/data/attention.pdf -------------------------------------------------------------------------------- /tests/apiserver/data/workflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .workflow_test import MyWorkflow, _TestEnvWorkflow 2 | 3 | my_workflow = MyWorkflow() 4 | env_reader_workflow = _TestEnvWorkflow() 5 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/routers/__init__.py: -------------------------------------------------------------------------------- 1 | from .deployments import deployments_router 2 | from .status import status_router 3 | 4 | __all__ = ["deployments_router", "status_router"] 5 | -------------------------------------------------------------------------------- /examples/llamacloud/google_drive/src/config.yml: -------------------------------------------------------------------------------- 1 | llamacloud: 2 | index_name: "" 3 | project_name: "" 4 | organization_id: "" 5 | -------------------------------------------------------------------------------- /llama_deploy/client/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .apiserver import ApiServer 2 | from .model import Collection, Model, make_sync 3 | 4 | __all__ = ["ApiServer", "Collection", "Model", "make_sync"] 5 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/rxconfig.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | 3 | config = rx.Config( 4 | app_name="frontend", 5 | api_url="http://localhost:9000", 6 | backend_port=9000, 7 | deployment_name="deployment", 8 | ) 9 | -------------------------------------------------------------------------------- /examples/python_fullstack/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | from .agent_workflow import build_agentic_workflow 2 | from .rag_workflow import build_rag_workflow 3 | 4 | 5 | rag_w = build_rag_workflow() 6 | agentic_w = build_agentic_workflow(rag_w) 7 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/source_managers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import SourceManager 2 | from .git import GitSourceManager 3 | from .local import LocalSourceManager 4 | 5 | __all__ = ["GitSourceManager", "LocalSourceManager", "SourceManager"] 6 | -------------------------------------------------------------------------------- /.taplo.toml: -------------------------------------------------------------------------------- 1 | [formatting] 2 | align_comments = false 3 | reorder_keys = false 4 | # Following are to be consistent with toml-sort 5 | indent_string = " " 6 | array_trailing_comma = false 7 | compact_arrays = true 8 | compact_inline_tables = true 9 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_autodeploy.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.asyncio 5 | async def test_autodeploy(client, apiserver_with_rc): 6 | status = await client.apiserver.status() 7 | assert "AutoDeployed" in status.deployments 8 | -------------------------------------------------------------------------------- /llama_deploy/cli/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from llama_deploy.cli import llamactl 3 | 4 | 5 | def main() -> None: 6 | """CLI entrypoint.""" 7 | sys.exit(llamactl()) 8 | 9 | 10 | if __name__ == "__main__": # pragma: no cover 11 | main() 12 | -------------------------------------------------------------------------------- /docs/docs/api_reference/llama_deploy/python_sdk.md: -------------------------------------------------------------------------------- 1 | # Python SDK 2 | 3 | ## Client 4 | 5 | ::: llama_deploy.client.Client 6 | options: 7 | show_bases: false 8 | 9 | 10 | ## API Server functionalities 11 | 12 | ::: llama_deploy.client.models.apiserver 13 | -------------------------------------------------------------------------------- /examples/python_dependencies/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "uv-requirements" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.10, <4.0" 7 | dependencies = [ 8 | "pyfiglet>=1.0.3" 9 | ] 10 | -------------------------------------------------------------------------------- /docs/docs/css/custom.css: -------------------------------------------------------------------------------- 1 | #my-component-root *, 2 | #headlessui-portal-root * { 3 | z-index: 1000000000000; 4 | font-size: 100%; 5 | } 6 | 7 | textarea { 8 | border: 0; 9 | padding: 0; 10 | } 11 | 12 | article p { 13 | margin-bottom: 10px !important; 14 | } 15 | -------------------------------------------------------------------------------- /tests/cli/data/config.yaml: -------------------------------------------------------------------------------- 1 | current_profile: default 2 | profiles: 3 | default: 4 | insecure: false 5 | server: http://localhost:4501 6 | timeout: 120.0 7 | 8 | test: 9 | insecure: false 10 | server: http://localhost:4501 11 | timeout: 120.0 12 | -------------------------------------------------------------------------------- /docs/docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | #my-component-root *, 2 | #headlessui-portal-root * { 3 | z-index: 1000000000000; 4 | font-size: 100%; 5 | } 6 | 7 | textarea { 8 | border: 0; 9 | padding: 0; 10 | } 11 | 12 | article p { 13 | margin-bottom: 10px !important; 14 | } 15 | -------------------------------------------------------------------------------- /tests/apiserver/test_app.py: -------------------------------------------------------------------------------- 1 | from fastapi.testclient import TestClient 2 | 3 | 4 | def test_read_main(http_client: TestClient) -> None: 5 | response = http_client.get("/") 6 | assert response.status_code == 200 7 | assert set(response.json().keys()) == {"swagger_docs", "status"} 8 | -------------------------------------------------------------------------------- /docs/docs/api_reference/llama_deploy/apiserver.md: -------------------------------------------------------------------------------- 1 | # `apiserver` 2 | 3 | ::: llama_deploy.apiserver.deployment 4 | 5 | ::: llama_deploy.apiserver.deployment_config_parser 6 | options: 7 | members: 8 | - DeploymentConfig 9 | 10 | ::: llama_deploy.apiserver.source_managers 11 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/__init__.py: -------------------------------------------------------------------------------- 1 | from .workflow import EchoWorkflow 2 | from .workflow_reload import EchoWithPrompt 3 | 4 | my_workflow = EchoWorkflow() 5 | echo_workflow_en = EchoWithPrompt(prompt_msg="I have received:") 6 | echo_workflow_it = EchoWithPrompt(prompt_msg="Ho ricevuto:") 7 | -------------------------------------------------------------------------------- /examples/python_fullstack/workflows/requirements.txt: -------------------------------------------------------------------------------- 1 | llama-deploy[rabbitmq, kafka, redis]==0.1.1 2 | llama-index-postprocessor-rankgpt-rerank>=0.2.0 3 | llama-index-vector-stores-qdrant>=0.3.0 4 | llama-index-llms-openai>=0.2.2 5 | llama-index-embeddings-openai>=0.2.4 6 | llama-index-readers-file>=0.2.0 7 | -------------------------------------------------------------------------------- /examples/google_cloud_run/deployment.yml: -------------------------------------------------------------------------------- 1 | name: CloudRunExample 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: dummy_workflow 7 | 8 | services: 9 | dummy_workflow: 10 | name: Dummy Workflow 11 | source: 12 | type: local 13 | name: . 14 | path: workflow:echo_workflow 15 | -------------------------------------------------------------------------------- /tests/apiserver/data/local.yaml: -------------------------------------------------------------------------------- 1 | name: LocalDeploymentRelativePath 2 | 3 | control-plane: {} 4 | 5 | services: 6 | test-workflow: 7 | name: Test Workflow 8 | port: 8002 9 | host: localhost 10 | source: 11 | type: local 12 | location: workflow 13 | import-path: workflow:my_workflow 14 | -------------------------------------------------------------------------------- /docs/docs/module_guides/llama_deploy/40_llamactl.md: -------------------------------------------------------------------------------- 1 | # CLI 2 | 3 | `llamactl` is a command line interface that ships with LlamaDeploy and has the main goal to easily interact with a 4 | running [API Server](./20_core_components.md#api-server). 5 | 6 | ::: mkdocs-click 7 | :module: llama_deploy.cli 8 | :command: llamactl 9 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_hitl.yml: -------------------------------------------------------------------------------- 1 | name: HumanInTheLoop 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: hitl_workflow 7 | 8 | services: 9 | hitl_workflow: 10 | name: HITL Workflow 11 | source: 12 | type: local 13 | name: src 14 | path: src/workflow_hitl:workflow 15 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/rc/deployment.yml: -------------------------------------------------------------------------------- 1 | name: AutoDeployed 2 | 3 | control-plane: {} 4 | 5 | default-service: test-workflow 6 | 7 | services: 8 | test-workflow: 9 | name: Test Workflow 10 | port: 8002 11 | host: localhost 12 | source: 13 | type: local 14 | name: src 15 | path: src/workflow:echo_workflow 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Unit test / coverage reports 6 | .coverage 7 | .coverage.* 8 | coverage.* 9 | .pytest_cache/ 10 | 11 | # Build artifacts 12 | dist/ 13 | 14 | # Project related 15 | .tool-versions 16 | 17 | # IDEs 18 | .idea 19 | .DS_Store 20 | .vscode 21 | .zed 22 | .claude 23 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment2.yml: -------------------------------------------------------------------------------- 1 | name: TestDeployment2 2 | 3 | control-plane: {} 4 | 5 | default-service: dummy_workflow 6 | 7 | services: 8 | test-workflow: 9 | name: Test Workflow 10 | port: 8002 11 | host: localhost 12 | source: 13 | type: local 14 | name: src 15 | path: src:my_workflow 16 | -------------------------------------------------------------------------------- /tests/client/models/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Iterator 2 | from unittest import mock 3 | 4 | import pytest 5 | 6 | from llama_deploy.client import Client 7 | 8 | 9 | @pytest.fixture 10 | def client(monkeypatch: Any) -> Iterator[Client]: 11 | monkeypatch.setattr(Client, "request", mock.AsyncMock()) 12 | yield Client() 13 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_streaming.yml: -------------------------------------------------------------------------------- 1 | name: Streaming 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: streaming_workflow 7 | 8 | services: 9 | streaming_workflow: 10 | name: Streaming Workflow 11 | source: 12 | type: local 13 | name: src 14 | path: src/workflow:streaming_workflow 15 | -------------------------------------------------------------------------------- /tests/apiserver/data/git_service.yaml: -------------------------------------------------------------------------------- 1 | name: TestDeployment 2 | 3 | control-plane: {} 4 | 5 | services: 6 | test-workflow: 7 | name: Test Workflow 8 | port: 8002 9 | host: localhost 10 | source: 11 | type: git 12 | location: https://github.com/run-llama/llama_deploy.git 13 | import-path: tests/apiserver/data/workflow:my_workflow 14 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | categories: 3 | - title: Breaking Changes ⚠️ 4 | labels: 5 | - breaking-change 6 | - title: New Features 🎉 7 | labels: 8 | - '*' 9 | - title: Bug Fixes 🐛 10 | labels: 11 | - bug 12 | - title: Documentation 📚 13 | labels: 14 | - documentation 15 | - example 16 | -------------------------------------------------------------------------------- /llama_deploy/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | root_logger = logging.getLogger("llama_deploy") 4 | 5 | formatter = logging.Formatter("%(levelname)s:%(name)s - %(message)s") 6 | console_handler = logging.StreamHandler() 7 | console_handler.setFormatter(formatter) 8 | root_logger.addHandler(console_handler) 9 | 10 | root_logger.setLevel(logging.INFO) 11 | root_logger.propagate = True 12 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment1.yml: -------------------------------------------------------------------------------- 1 | name: TestDeployment1 2 | 3 | control-plane: {} 4 | 5 | default-service: dummy_workflow 6 | 7 | services: 8 | test-workflow: 9 | name: Test Workflow 10 | port: 8002 11 | host: localhost 12 | source: 13 | type: git 14 | name: https://github.com/run-llama/llama_deploy.git 15 | path: e2e_tests/apiserver/deployments/src:my_workflow 16 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_reload1.yml: -------------------------------------------------------------------------------- 1 | name: ReloadMe 2 | 3 | control-plane: {} 4 | 5 | default-service: test-workflow 6 | 7 | services: 8 | test-workflow: 9 | name: Test Workflow 10 | port: 8002 11 | host: localhost 12 | source: 13 | type: git 14 | name: https://github.com/run-llama/llama_deploy.git 15 | path: e2e_tests/apiserver/deployments/src:echo_workflow_en 16 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_reload2.yml: -------------------------------------------------------------------------------- 1 | name: ReloadMe 2 | 3 | control-plane: {} 4 | 5 | default-service: test-workflow 6 | 7 | services: 8 | test-workflow: 9 | name: Test Workflow 10 | port: 8002 11 | host: localhost 12 | source: 13 | type: git 14 | name: https://github.com/run-llama/llama_deploy.git 15 | path: e2e_tests/apiserver/deployments/src:echo_workflow_it 16 | -------------------------------------------------------------------------------- /examples/python_dependencies/deployment.yaml: -------------------------------------------------------------------------------- 1 | name: dependencies 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: echo_workflow 7 | 8 | services: 9 | echo_workflow: 10 | name: Pretty Echo Workflow 11 | source: 12 | type: local 13 | name: src 14 | path: workflow:echo_workflow 15 | python-dependencies: 16 | - cowpy 17 | - "." 18 | - "requirements.txt" 19 | -------------------------------------------------------------------------------- /templates/basic/ui/public/file.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/quick_start/quick_start.yml: -------------------------------------------------------------------------------- 1 | name: QuickStart 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: echo_workflow 7 | 8 | services: 9 | echo_workflow: 10 | name: Echo Workflow 11 | source: 12 | type: local 13 | name: src 14 | path: src/workflow:echo_workflow 15 | 16 | ui: 17 | name: My Nextjs App 18 | port: 3001 19 | source: 20 | type: local 21 | name: ui 22 | -------------------------------------------------------------------------------- /examples/quick_start/ui/public/file.svg: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/__main__.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | from prometheus_client import start_http_server 3 | 4 | from .settings import settings 5 | 6 | if __name__ == "__main__": 7 | if settings.prometheus_enabled: 8 | start_http_server(settings.prometheus_port) 9 | 10 | uvicorn.run( 11 | "llama_deploy.apiserver.app:app", 12 | host=settings.host, 13 | port=settings.port, 14 | ) 15 | -------------------------------------------------------------------------------- /llama_deploy/cli/internal/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from platformdirs import user_config_dir 4 | 5 | DEFAULT_PROFILE_NAME = "default" 6 | DEFAULT_CONFIG_FILE_NAME = "config.yaml" 7 | DEFAULT_CONFIG_FOLDER_NAME = "llamactl" 8 | 9 | 10 | def _default_config_path() -> Path: 11 | base = user_config_dir(DEFAULT_CONFIG_FOLDER_NAME, appauthor=False) 12 | return Path(base) / DEFAULT_CONFIG_FILE_NAME 13 | -------------------------------------------------------------------------------- /templates/basic/ui/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | basePath: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH, 5 | env: { 6 | NEXT_PUBLIC_LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME: process.env.LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME || "default", 7 | NEXT_PUBLIC_BASE_PATH: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH, 8 | }, 9 | }; 10 | 11 | export default nextConfig; 12 | -------------------------------------------------------------------------------- /docker/run_apiserver.py: -------------------------------------------------------------------------------- 1 | import uvicorn 2 | from prometheus_client import start_http_server 3 | 4 | from llama_deploy.apiserver.settings import settings 5 | 6 | if __name__ == "__main__": 7 | if settings.prometheus_enabled: 8 | start_http_server(settings.prometheus_port) 9 | 10 | uvicorn.run( 11 | "llama_deploy.apiserver.app:app", 12 | host=settings.host, 13 | port=settings.port, 14 | ) 15 | -------------------------------------------------------------------------------- /tests/apiserver/data/env_variables.yaml: -------------------------------------------------------------------------------- 1 | name: MyDeployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | message-queue: 7 | type: simple 8 | host: "127.0.0.1" 9 | port: 8001 10 | 11 | default-service: myworkflow 12 | 13 | services: 14 | myworkflow: 15 | name: My Python Workflow 16 | env: 17 | VAR_1: x 18 | VAR_2: y 19 | env-files: 20 | - .env 21 | source: 22 | type: local 23 | location: workflow 24 | -------------------------------------------------------------------------------- /examples/quick_start/ui/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | const nextConfig: NextConfig = { 3 | basePath: process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH, 4 | env: { 5 | NEXT_PUBLIC_LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME: 6 | process.env.LLAMA_DEPLOY_NEXTJS_DEPLOYMENT_NAME || "default", 7 | NEXT_PUBLIC_BASE_PATH: 8 | process.env.LLAMA_DEPLOY_NEXTJS_BASE_PATH || "", 9 | }, 10 | }; 11 | 12 | export default nextConfig; 13 | -------------------------------------------------------------------------------- /.github/workflows/gh_project.yml: -------------------------------------------------------------------------------- 1 | name: Add issues to GitHub project 2 | 3 | on: 4 | issues: 5 | types: 6 | - opened 7 | 8 | jobs: 9 | add-to-project: 10 | name: Add new issues to project for triage 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/add-to-project@v1.0.2 14 | with: 15 | project-url: https://github.com/orgs/run-llama/projects/8 16 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} 17 | -------------------------------------------------------------------------------- /tests/apiserver/data/python_dependencies.yaml: -------------------------------------------------------------------------------- 1 | name: MyDeployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | message-queue: 7 | type: simple 8 | host: "127.0.0.1" 9 | port: 8001 10 | 11 | default-service: myworkflow 12 | 13 | services: 14 | myworkflow: 15 | name: My Python Workflow 16 | python-dependencies: 17 | - "llama-index-core<1" 18 | - "llama-index-llms-openai" 19 | source: 20 | type: local 21 | location: test 22 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/workflow_reload.py: -------------------------------------------------------------------------------- 1 | from workflows import Context, Workflow, step 2 | from workflows.events import StartEvent, StopEvent 3 | 4 | 5 | class EchoWithPrompt(Workflow): 6 | def __init__(self, prompt_msg): 7 | super().__init__() 8 | self._prompt_msg = prompt_msg 9 | 10 | @step 11 | def do_something(self, ctx: Context, ev: StartEvent) -> StopEvent: 12 | return StopEvent(result=f"{self._prompt_msg}{ev.data}") 13 | -------------------------------------------------------------------------------- /llama_deploy/types/apiserver.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class StatusEnum(Enum): 7 | HEALTHY = "Healthy" 8 | UNHEALTHY = "Unhealthy" 9 | DOWN = "Down" 10 | 11 | 12 | class Status(BaseModel): 13 | status: StatusEnum 14 | status_message: str 15 | max_deployments: int | None = None 16 | deployments: list[str] | None = None 17 | 18 | 19 | class DeploymentDefinition(BaseModel): 20 | name: str 21 | -------------------------------------------------------------------------------- /tests/apiserver/data/service_ports.yaml: -------------------------------------------------------------------------------- 1 | name: TestDeployment 2 | 3 | control-plane: {} 4 | 5 | services: 6 | no-port: 7 | name: No Port 8 | source: 9 | type: local 10 | location: workflow 11 | 12 | has-port: 13 | name: Has Port 14 | port: 9999 15 | source: 16 | type: local 17 | location: workflow 18 | 19 | no-port-again: 20 | name: Again no Port 21 | source: 22 | type: local 23 | location: workflow 24 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | 15 | - name: Install uv 16 | uses: astral-sh/setup-uv@v5 17 | 18 | - name: Set up Python 19 | run: uv python install 20 | 21 | - name: Run linter 22 | shell: bash 23 | run: uv run -- pre-commit run -a 24 | -------------------------------------------------------------------------------- /templates/basic/ui/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /examples/quick_start/ui/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | ]; 15 | 16 | export default eslintConfig; 17 | -------------------------------------------------------------------------------- /tests/apiserver/data/python_dependencies_kitchen_sink.yaml: -------------------------------------------------------------------------------- 1 | name: MyDeployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | message-queue: 7 | type: simple 8 | host: "127.0.0.1" 9 | port: 8001 10 | 11 | default-service: myworkflow 12 | 13 | services: 14 | myworkflow: 15 | name: My Python Workflow 16 | python-dependencies: 17 | - "test<1" 18 | - "./bar/requirements.txt" 19 | - "./foo/bar/" # pyproject.toml 20 | source: 21 | type: local 22 | location: test 23 | -------------------------------------------------------------------------------- /llama_deploy/types/__init__.py: -------------------------------------------------------------------------------- 1 | from .apiserver import DeploymentDefinition, Status, StatusEnum 2 | from .core import ( 3 | ChatMessage, 4 | EventDefinition, 5 | SessionDefinition, 6 | TaskDefinition, 7 | TaskResult, 8 | generate_id, 9 | ) 10 | 11 | __all__ = [ 12 | "ChatMessage", 13 | "EventDefinition", 14 | "SessionDefinition", 15 | "TaskDefinition", 16 | "TaskResult", 17 | "generate_id", 18 | "DeploymentDefinition", 19 | "Status", 20 | "StatusEnum", 21 | ] 22 | -------------------------------------------------------------------------------- /tests/apiserver/data/with_ui.yaml: -------------------------------------------------------------------------------- 1 | name: test-deployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: echo_workflow 7 | 8 | services: 9 | test-workflow: 10 | name: Test Workflow 11 | port: 8002 12 | host: localhost 13 | source: 14 | type: local 15 | location: workflow 16 | import-path: workflow:my_workflow 17 | 18 | ui: 19 | name: My Nextjs App 20 | source: 21 | type: git 22 | location: https://github.com/run-llama/llama_deploy.git 23 | import-path: src/ui 24 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_env_local.yml: -------------------------------------------------------------------------------- 1 | name: EnvironmentVariablesLocal 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: test_env_workflow 7 | 8 | services: 9 | test_env_workflow: 10 | name: Workflow 11 | source: 12 | type: local 13 | name: src 14 | env: 15 | VAR_1: x # this gets overwritten because VAR_1 also exists in the provided .env 16 | VAR_2: y 17 | env-files: 18 | - src/.env # relative to source path 19 | path: src/workflow_env:workflow 20 | -------------------------------------------------------------------------------- /tests/apiserver/test_settings.py: -------------------------------------------------------------------------------- 1 | from llama_deploy.apiserver.settings import ApiserverSettings 2 | 3 | 4 | def test_settings_url() -> None: 5 | s = ApiserverSettings() 6 | assert s.url == "http://127.0.0.1:4501" 7 | 8 | s = ApiserverSettings(use_tls=True) 9 | assert s.url == "https://127.0.0.1:4501" 10 | 11 | s = ApiserverSettings(host="example.com", port=8080) 12 | assert s.url == "http://example.com:8080" 13 | 14 | s = ApiserverSettings(host="example.com", port=80) 15 | assert s.url == "http://example.com" 16 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/deployment_env_git.yml: -------------------------------------------------------------------------------- 1 | name: EnvironmentVariablesGit 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | services: 7 | workflow_git: 8 | name: Git Workflow 9 | source: 10 | type: git 11 | name: https://github.com/run-llama/llama_deploy.git 12 | env: 13 | VAR_1: x # this gets overwritten because VAR_1 also exists in the provided .env 14 | VAR_2: y 15 | env-files: 16 | - tests/apiserver/data/.env # relative to source path 17 | path: tests/apiserver/data/workflow:env_reader_workflow 18 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/workflow_hitl.py: -------------------------------------------------------------------------------- 1 | from workflows import Workflow, step 2 | from workflows.events import ( 3 | HumanResponseEvent, 4 | InputRequiredEvent, 5 | StartEvent, 6 | StopEvent, 7 | ) 8 | 9 | 10 | class HumanInTheLoopWorkflow(Workflow): 11 | @step 12 | async def step1(self, ev: StartEvent) -> InputRequiredEvent: 13 | return InputRequiredEvent(prefix="Enter a number: ") 14 | 15 | @step 16 | async def step2(self, ev: HumanResponseEvent) -> StopEvent: 17 | return StopEvent(result=ev.response) 18 | 19 | 20 | workflow = HumanInTheLoopWorkflow(timeout=3) 21 | -------------------------------------------------------------------------------- /templates/basic/ui/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | :root { 4 | --background: #ffffff; 5 | --foreground: #171717; 6 | } 7 | 8 | @theme inline { 9 | --color-background: var(--background); 10 | --color-foreground: var(--foreground); 11 | --font-sans: var(--font-geist-sans); 12 | --font-mono: var(--font-geist-mono); 13 | } 14 | 15 | @media (prefers-color-scheme: dark) { 16 | :root { 17 | --background: #0a0a0a; 18 | --foreground: #ededed; 19 | } 20 | } 21 | 22 | body { 23 | background: var(--background); 24 | color: var(--foreground); 25 | font-family: Arial, Helvetica, sans-serif; 26 | } 27 | -------------------------------------------------------------------------------- /examples/quick_start/ui/app/globals.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss"; 2 | 3 | :root { 4 | --background: #ffffff; 5 | --foreground: #171717; 6 | } 7 | 8 | @theme inline { 9 | --color-background: var(--background); 10 | --color-foreground: var(--foreground); 11 | --font-sans: var(--font-geist-sans); 12 | --font-mono: var(--font-geist-mono); 13 | } 14 | 15 | @media (prefers-color-scheme: dark) { 16 | :root { 17 | --background: #0a0a0a; 18 | --foreground: #ededed; 19 | } 20 | } 21 | 22 | body { 23 | background: var(--background); 24 | color: var(--foreground); 25 | font-family: Arial, Helvetica, sans-serif; 26 | } 27 | -------------------------------------------------------------------------------- /examples/python_fullstack/workflows/dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | EXPOSE 8002 4 | EXPOSE 8003 5 | 6 | WORKDIR /app 7 | 8 | # Copy source code 9 | COPY . . 10 | 11 | # Install libraries for necessary python package builds 12 | RUN apt-get clean && apt-get update \ 13 | && apt-get --no-install-recommends install build-essential python3-dev libpq-dev curl -y \ 14 | && pip install --no-cache-dir --upgrade pip \ 15 | && apt-get -yq update && apt-get -yqq install ssh \ 16 | && apt-get clean \ 17 | && pip install -r requirements.txt 18 | 19 | # Run the deploy.py script 20 | ENTRYPOINT ["python", "./deploy.py"] 21 | -------------------------------------------------------------------------------- /docs/docs/_static/js/leadfeeder.js: -------------------------------------------------------------------------------- 1 | (function (ss, ex) { 2 | window.ldfdr = 3 | window.ldfdr || 4 | function () { 5 | (ldfdr._q = ldfdr._q || []).push([].slice.call(arguments)); 6 | }; 7 | (function (d, s) { 8 | fs = d.getElementsByTagName(s)[0]; 9 | function ce(src) { 10 | var cs = d.createElement(s); 11 | cs.src = src; 12 | cs.async = 1; 13 | fs.parentNode.insertBefore(cs, fs); 14 | } 15 | ce( 16 | "https://sc.lfeeder.com/lftracker_v1_" + 17 | ss + 18 | (ex ? "_" + ex : "") + 19 | ".js", 20 | ); 21 | })(document, "script"); 22 | })("Xbp1oaEnqwn8EdVj"); 23 | -------------------------------------------------------------------------------- /tests/apiserver/data/workflow/workflow_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from workflows import Context, Workflow, step 4 | from workflows.events import StartEvent, StopEvent 5 | 6 | 7 | class MyWorkflow(Workflow): 8 | @step 9 | def do_something(self, ctx: Context, ev: StartEvent) -> StopEvent: 10 | return StopEvent(result=f"Received: {ev.data}") 11 | 12 | 13 | class _TestEnvWorkflow(Workflow): 14 | @step() 15 | async def read_env_vars(self, ctx: Context, ev: StartEvent) -> StopEvent: 16 | env_vars = [f"{v}: {os.environ.get(v)}" for v in ev.get("env_vars_to_read")] 17 | return StopEvent(result=", ".join(env_vars)) 18 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_status.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.mark.asyncio 5 | async def test_status_down(client): 6 | res = await client.apiserver.status() 7 | assert res.status.value == "Down" 8 | 9 | 10 | def test_status_down_sync(client): 11 | res = client.sync.apiserver.status() 12 | assert res.status.value == "Down" 13 | 14 | 15 | @pytest.mark.asyncio 16 | async def test_status_up(apiserver, client): 17 | res = await client.apiserver.status() 18 | assert res.status.value == "Healthy" 19 | 20 | 21 | def test_status_up_sync(apiserver, client): 22 | res = client.sync.apiserver.status() 23 | assert res.status.value == "Healthy" 24 | -------------------------------------------------------------------------------- /templates/basic/ui/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /examples/quick_start/ui/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /examples/quick_start/src/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from workflows import Workflow, step 4 | from workflows.events import StartEvent, StopEvent 5 | 6 | 7 | # create a dummy workflow 8 | class EchoWorkflow(Workflow): 9 | """A dummy workflow with only one step sending back the input given.""" 10 | 11 | @step() 12 | async def run_step(self, ev: StartEvent) -> StopEvent: 13 | message = str(ev.get("message", "")) 14 | return StopEvent(result=f"Message received: {message}") 15 | 16 | 17 | echo_workflow = EchoWorkflow() 18 | 19 | 20 | async def main(): 21 | print(await echo_workflow.run(message="Hello!")) 22 | 23 | 24 | if __name__ == "__main__": 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_env_vars_local.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from llama_deploy.types.core import TaskDefinition 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_read_env_vars_local(apiserver, client): 10 | here = Path(__file__).parent 11 | deployment_fp = here / "deployments" / "deployment_env_local.yml" 12 | with open(deployment_fp) as f: 13 | deployment = await client.apiserver.deployments.create( 14 | f, base_path=deployment_fp.parent 15 | ) 16 | 17 | result = await deployment.tasks.run( 18 | TaskDefinition(service_id="test_env_workflow", input="") 19 | ) 20 | 21 | assert result == "var_1: z, var_2: y, api_key: 123" 22 | -------------------------------------------------------------------------------- /examples/google_cloud_run/src/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from workflows import Workflow, step 4 | from workflows.events import StartEvent, StopEvent 5 | 6 | 7 | # create a dummy workflow 8 | class EchoWorkflow(Workflow): 9 | """A dummy workflow with only one step sending back the input given.""" 10 | 11 | @step() 12 | async def run_step(self, ev: StartEvent) -> StopEvent: 13 | message = str(ev.get("message", "")) 14 | return StopEvent(result=f"Message received: {message}") 15 | 16 | 17 | echo_workflow = EchoWorkflow() 18 | 19 | 20 | async def main(): 21 | print(await echo_workflow.run(message="Hello!")) 22 | 23 | 24 | if __name__ == "__main__": 25 | asyncio.run(main()) 26 | -------------------------------------------------------------------------------- /templates/basic/ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /examples/quick_start/ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /templates/basic/ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@heroicons/react": "^2.2.0", 13 | "next": "15.4.7", 14 | "react": "^19.0.0", 15 | "react-dom": "^19.0.0" 16 | }, 17 | "devDependencies": { 18 | "@eslint/eslintrc": "^3", 19 | "@tailwindcss/postcss": "^4", 20 | "@types/node": "^20", 21 | "@types/react": "^19", 22 | "@types/react-dom": "^19", 23 | "eslint": "^9", 24 | "eslint-config-next": "15.3.2", 25 | "tailwindcss": "^4", 26 | "typescript": "^5" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /e2e_tests/README.md: -------------------------------------------------------------------------------- 1 | # E2E Tests 2 | 3 | E2E tests are run in the `e2e-tests` workflow in `.github/workflows/e2e-tests.yml`. 4 | 5 | Each folder in this directory contains a Python package representing a set of tests for a simple scenario. 6 | 7 | When new folders are added, they will be executed automatically in the CI/CD pipeline by `pytest`. 8 | 9 | To run all the tests: 10 | 11 | ```sh 12 | $ uv run -- pytest -m"e2e" 13 | ``` 14 | or 15 | ```sh 16 | $ uv run -- pytest ./e2e_tests 17 | ``` 18 | 19 | To run a specific scenario: 20 | 21 | ```sh 22 | $ uv run -- pytest e2e_tests/apiserver 23 | ``` 24 | 25 | If you want to see the output of the different services running, pass the `-s` flag to pytest: 26 | 27 | ```sh 28 | $ uv run -- pytest e2e_tests/apiserver/test_deploy.py -s 29 | ``` 30 | -------------------------------------------------------------------------------- /examples/google_cloud_run/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM llamaindex/llama-deploy:main 2 | 3 | # This will be passed at build time 4 | ARG SOURCE_DIR="" 5 | 6 | # Copy the application code (for example, your workflow) 7 | COPY ${SOURCE_DIR} /app/code 8 | WORKDIR /app/code 9 | 10 | # Deploy automatically what's in /app/code when the container starts 11 | ENV LLAMA_DEPLOY_APISERVER_RC_PATH=/app/code 12 | COPY deployment.yml /app/code 13 | 14 | # Cloud Run requires the container to listen to port 8080 15 | ENV LLAMA_DEPLOY_APISERVER_HOST=0.0.0.0 16 | ENV LLAMA_DEPLOY_APISERVER_PORT=8080 17 | EXPOSE 8080 18 | 19 | # In Cloud Run localhost resolves to the public URI on port 80, 20 | # let's override the default and point explicitly to the internal 21 | # host and port 22 | ENV LLAMA_DEPLOY_API_SERVER_URL="http://127.0.0.1:8080" 23 | -------------------------------------------------------------------------------- /templates/basic/ui/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Geist, Geist_Mono } from "next/font/google"; 3 | import "./globals.css"; 4 | 5 | const geistSans = Geist({ 6 | variable: "--font-geist-sans", 7 | }); 8 | 9 | const geistMono = Geist_Mono({ 10 | variable: "--font-geist-mono", 11 | }); 12 | 13 | export const metadata: Metadata = { 14 | title: "Create Next App", 15 | description: "Generated by create next app", 16 | }; 17 | 18 | export default function RootLayout({ 19 | children, 20 | }: Readonly<{ 21 | children: React.ReactNode; 22 | }>) { 23 | return ( 24 | 25 | 28 | {children} 29 | 30 | 31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_env_vars_git.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import pytest 5 | 6 | from llama_deploy.types.core import TaskDefinition 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_read_env_vars_git(apiserver, client): 11 | here = Path(__file__).parent 12 | deployment_fp = here / "deployments" / "deployment_env_git.yml" 13 | with open(deployment_fp) as f: 14 | deployment = await client.apiserver.deployments.create( 15 | f, base_path=deployment_fp.parent 16 | ) 17 | 18 | input_str = json.dumps({"env_vars_to_read": ["VAR_1", "VAR_2", "API_KEY"]}) 19 | result = await deployment.tasks.run( 20 | TaskDefinition(service_id="workflow_git", input=input_str) 21 | ) 22 | 23 | assert result == "VAR_1: x, VAR_2: y, API_KEY: 123" 24 | -------------------------------------------------------------------------------- /examples/quick_start/ui/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Geist, Geist_Mono } from "next/font/google"; 3 | import "./globals.css"; 4 | 5 | const geistSans = Geist({ 6 | variable: "--font-geist-sans", 7 | }); 8 | 9 | const geistMono = Geist_Mono({ 10 | variable: "--font-geist-mono", 11 | }); 12 | 13 | export const metadata: Metadata = { 14 | title: "Create Next App", 15 | description: "Generated by create next app", 16 | }; 17 | 18 | export default function RootLayout({ 19 | children, 20 | }: Readonly<{ 21 | children: React.ReactNode; 22 | }>) { 23 | return ( 24 | 25 | 28 | {children} 29 | 30 | 31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /templates/basic/src/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from llama_index.llms.openai import OpenAI 4 | from workflows import Workflow, step 5 | from workflows.events import StartEvent, StopEvent 6 | 7 | 8 | # create a completion workflow 9 | class CompletionWorkflow(Workflow): 10 | """A completion workflow with a single step.""" 11 | 12 | llm: OpenAI = OpenAI(model="gpt-4.1-nano") 13 | 14 | @step() 15 | async def run_step(self, ev: StartEvent) -> StopEvent: 16 | message = str(ev.get("message", "")) 17 | response = await self.llm.acomplete(message) 18 | return StopEvent(result=response.text) 19 | 20 | 21 | workflow = CompletionWorkflow() 22 | 23 | 24 | async def main() -> None: 25 | print(await workflow.run(message="Hello!")) 26 | 27 | 28 | if __name__ == "__main__": 29 | asyncio.run(main()) 30 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/dockerfile: -------------------------------------------------------------------------------- 1 | # This Dockerfile is used to deploy a simple single-container Reflex app instance. 2 | FROM python:3.10-slim 3 | 4 | # Copy local context to `/app` inside container (see .dockerignore) 5 | WORKDIR /app 6 | COPY . . 7 | 8 | # Install app requirements and reflex in the container 9 | # Deploy templates and prepare app 10 | # Download all npm dependencies and compile frontend 11 | RUN apt-get clean && apt-get update \ 12 | && apt-get --no-install-recommends install zip unzip curl -y \ 13 | && pip install -r requirements.txt \ 14 | && reflex export --frontend-only --no-zip 15 | 16 | # Needed until Reflex properly passes SIGTERM on backend. 17 | STOPSIGNAL SIGKILL 18 | 19 | # Always apply migrations before starting the backend. 20 | CMD [ -d alembic ] && reflex db migrate; reflex run --env prod 21 | -------------------------------------------------------------------------------- /examples/llamacloud/google_drive/deployment.yml: -------------------------------------------------------------------------------- 1 | name: LlamaCloud_LlamaDeploy_GoogleDrive 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | default-service: llamacloud_workflow 7 | 8 | services: 9 | llamacloud_workflow: 10 | name: LlamaCloud GoogleDrive Data Source Workflow 11 | # We tell LlamaDeploy where to look for our workflow 12 | source: 13 | # In this case, we instruct LlamaDeploy to look in the local filesystem 14 | type: local 15 | # The path relative to this deployment config file where to look for the code. This assumes 16 | # there's an src folder along with the config file containing the file workflow.py we created previously 17 | name: ./src 18 | # This assumes the file workflow.py contains a variable called `echo_workflow` containing our workflow instance 19 | path: workflow:llamacloud_workflow 20 | -------------------------------------------------------------------------------- /docs/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "docs" 7 | version = "0.1.0" 8 | description = "" 9 | authors = [{name = "Your Name", email = "you@example.com"}] 10 | requires-python = "~=3.11" 11 | readme = "README.md" 12 | dependencies = [ 13 | "llama_deploy[awssqs, rabbitmq, kafka, redis]", 14 | "mkdocs>=1.6.1,<2", 15 | "mkdocstrings[python]>=0.26.1,<0.27", 16 | "mkdocs-include-dir-to-nav>=1.2.0,<2", 17 | "mkdocs-material>=9.5.39,<10", 18 | "mkdocs-redirects>=1.2.1,<2", 19 | "mkdocs-click>=0.8.1,<0.9", 20 | "mkdocs-render-swagger-plugin>=0.1.2,<0.2", 21 | "griffe-fieldz>=0.2.0,<0.3", 22 | "mkdocs-github-admonitions-plugin>=0.0.3,<0.0.4" 23 | ] 24 | 25 | [tool.uv] 26 | package = false 27 | 28 | [tool.uv.sources] 29 | llama_deploy = {path = "../", editable = true} 30 | -------------------------------------------------------------------------------- /.github/workflows/e2e_test.yml: -------------------------------------------------------------------------------- 1 | name: E2E Testing 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | e2e-tests: 11 | runs-on: ubuntu-latest 12 | # E2E tests might get stuck, timeout aggressively for faster feedback 13 | timeout-minutes: 10 14 | strategy: 15 | # Let the matrix finish to see if the failure was transient 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.10", "3.11", "3.12"] 19 | test-package: ["apiserver"] 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Install uv and set the python version 24 | uses: astral-sh/setup-uv@v5 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | 28 | - name: Run All E2E Tests 29 | run: uv run -- pytest e2e_tests/${{ matrix.test-package }} -s 30 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/stats.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import Enum 2 | 3 | apiserver_state = Enum( 4 | "apiserver_state", 5 | "Current state of the API server", 6 | states=[ 7 | "starting", 8 | "running", 9 | "stopped", 10 | ], 11 | ) 12 | 13 | deployment_state = Enum( 14 | "deployment_state", 15 | "Current state of a deployment", 16 | ["deployment_name"], 17 | states=[ 18 | "loading_services", 19 | "ready", 20 | "starting_services", 21 | "running", 22 | "stopped", 23 | ], 24 | ) 25 | 26 | service_state = Enum( 27 | "service_state", 28 | "Current state of a service attached to a deployment", 29 | ["deployment_name", "service_name"], 30 | states=[ 31 | "loading", 32 | "syncing", 33 | "installing", 34 | "ready", 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /tests/cli/test_cli.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | from click.testing import CliRunner 4 | 5 | from llama_deploy.cli import llamactl 6 | from llama_deploy.cli.__main__ import main 7 | 8 | 9 | @mock.patch("llama_deploy.cli.__main__.sys") 10 | @mock.patch("llama_deploy.cli.__main__.llamactl") 11 | def test_main(mocked_cli, mocked_sys) -> None: # type: ignore 12 | mocked_cli.return_value = 0 13 | main() 14 | mocked_sys.exit.assert_called_with(0) 15 | 16 | 17 | def test_root_command(runner: CliRunner) -> None: 18 | result = runner.invoke(llamactl) 19 | assert result.exit_code == 0 20 | # Ensure invoking the root command outputs the help 21 | assert "Usage: llamactl" in result.output 22 | 23 | 24 | def test_wrong_profile(runner: CliRunner) -> None: 25 | result = runner.invoke(llamactl, ["-p", "foo"]) 26 | assert result.exit_code == 1 27 | -------------------------------------------------------------------------------- /examples/python_fullstack/python_fullstack.yaml: -------------------------------------------------------------------------------- 1 | name: MyDeployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | message-queue: 7 | type: redis 8 | # what follows depends on what's in the docker compose file 9 | host: redis 10 | port: 6379 11 | 12 | default-service: agentic_workflow 13 | 14 | services: 15 | agentic_workflow: 16 | name: Agentic Workflow 17 | source: 18 | type: local 19 | name: . 20 | path: workflows:agentic_w 21 | python-dependencies: 22 | - llama-index-postprocessor-rankgpt-rerank>=0.2.0 23 | - llama-index-vector-stores-qdrant>=0.3.0 24 | - llama-index-llms-openai>=0.2.2 25 | - llama-index-embeddings-openai>=0.2.4 26 | - llama-index-readers-file>=0.2.0 27 | 28 | rag_workflow: 29 | name: RAG Workflow 30 | source: 31 | type: local 32 | name: . 33 | path: workflows:rag_w 34 | -------------------------------------------------------------------------------- /.github/workflows/publish_release.yml: -------------------------------------------------------------------------------- 1 | name: Publish llama-index to PyPI / GitHub 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - "v*" 8 | 9 | jobs: 10 | build-n-publish: 11 | name: Build and publish to PyPI 12 | if: github.repository == 'run-llama/llama_deploy' 13 | runs-on: ubuntu-latest 14 | permissions: 15 | contents: write 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Install uv 21 | uses: astral-sh/setup-uv@v5 22 | 23 | - name: Build and publish 24 | env: 25 | UV_PUBLISH_TOKEN: ${{ secrets.LLAMA_AGENTS_PYPI_TOKEN }} 26 | run: | 27 | uv build 28 | uv publish 29 | 30 | - name: Create GitHub Release 31 | uses: ncipollo/release-action@v1 32 | with: 33 | artifacts: "dist/*" 34 | generateReleaseNotes: true 35 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_service_entrypoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | 5 | import requests 6 | 7 | 8 | def test_apiserver_entrypoint(): 9 | # Customize host and port 10 | env = os.environ.copy() 11 | env["LLAMA_DEPLOY_APISERVER_HOST"] = "localhost" 12 | env["LLAMA_DEPLOY_APISERVER_PORT"] = "4502" 13 | # Start the API server as a subprocess 14 | process = subprocess.Popen( 15 | ["python", "-m", "llama_deploy.apiserver"], 16 | stdout=subprocess.PIPE, 17 | stderr=subprocess.PIPE, 18 | env=env, 19 | ) 20 | 21 | try: 22 | # Wait a bit for the server to start 23 | time.sleep(2) 24 | 25 | response = requests.get("http://localhost:4502/status") 26 | assert response.status_code == 200 27 | finally: 28 | # Clean up: terminate the server process 29 | process.terminate() 30 | process.wait() 31 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/rc/src/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from workflows import Context, Workflow, step 4 | from workflows.events import Event, StartEvent, StopEvent 5 | 6 | 7 | class Message(Event): 8 | text: str 9 | 10 | 11 | class EchoWorkflow(Workflow): 12 | """A dummy workflow streaming three events.""" 13 | 14 | @step() 15 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent: 16 | for i in range(3): 17 | ctx.write_event_to_stream(Message(text=f"message number {i + 1}")) 18 | await asyncio.sleep(0.5) 19 | 20 | return StopEvent(result="Done.") 21 | 22 | 23 | echo_workflow = EchoWorkflow() 24 | 25 | 26 | async def main(): 27 | h = echo_workflow.run(message="Hello!") 28 | async for ev in h.stream_events(): 29 | if type(ev) is Message: 30 | print(ev.text) 31 | print(await h) 32 | 33 | 34 | if __name__ == "__main__": 35 | asyncio.run(main()) 36 | -------------------------------------------------------------------------------- /.github/workflows/unit_test.yml: -------------------------------------------------------------------------------- 1 | name: Unit Testing 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | jobs: 10 | unit-tests: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | # You can use PyPy versions in python-version. 14 | # For example, pypy-2.7 and pypy-3.8 15 | matrix: 16 | python-version: ["3.10", "3.11", "3.12"] 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - name: Install uv and set the python version 21 | uses: astral-sh/setup-uv@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | 25 | - name: Run testing 26 | shell: bash 27 | run: uv run -- pytest --cov --cov-report=xml tests 28 | 29 | - if: matrix.python-version == '3.12' 30 | name: Report Coveralls 31 | uses: coverallsapp/github-action@v2 32 | env: 33 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} 34 | -------------------------------------------------------------------------------- /examples/quick_start/ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ui", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@types/canvas-confetti": "^1.9.0", 13 | "canvas-confetti": "^1.9.3", 14 | "next": "15.4.7", 15 | "react": "^19.0.0", 16 | "react-dom": "^19.0.0" 17 | }, 18 | "devDependencies": { 19 | "@eslint/eslintrc": "^3", 20 | "@tailwindcss/postcss": "^4", 21 | "@types/node": "^20", 22 | "@types/react": "^19", 23 | "@types/react-dom": "^19", 24 | "eslint": "^9", 25 | "eslint-config-next": "15.3.2", 26 | "tailwindcss": "^4", 27 | "typescript": "^5" 28 | }, 29 | "packageManager": "pnpm@10.11.0+sha512.6540583f41cc5f628eb3d9773ecee802f4f9ef9923cc45b69890fb47991d4b092964694ec3a4f738a420c918a333062c8b925d312f42e4f0c263eb603551f977" 30 | } 31 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_streaming.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from llama_deploy.types import TaskDefinition 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_stream(apiserver, client): 10 | here = Path(__file__).parent 11 | deployment_fp = here / "deployments" / "deployment_streaming.yml" 12 | with open(deployment_fp) as f: 13 | deployment = await client.apiserver.deployments.create( 14 | f, base_path=deployment_fp.parent 15 | ) 16 | 17 | task = await deployment.tasks.create(TaskDefinition(input='{"a": "b"}')) 18 | 19 | read_events = [] 20 | async for ev in task.events(): 21 | if ev and "text" in ev: 22 | read_events.append(ev) 23 | assert len(read_events) == 3 24 | # the workflow produces events sequentially, so here we can assume events arrived in order 25 | for i, ev in enumerate(read_events): 26 | assert ev["text"] == f"message number {i + 1}" 27 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from workflows import Context, Workflow, step 4 | from workflows.events import Event, StartEvent, StopEvent 5 | 6 | 7 | class Message(Event): 8 | text: str 9 | 10 | 11 | class EchoWorkflow(Workflow): 12 | """A dummy workflow streaming three events.""" 13 | 14 | @step() 15 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent: 16 | for i in range(3): 17 | ctx.write_event_to_stream(Message(text=f"message number {i + 1}")) 18 | await asyncio.sleep(0.5) 19 | 20 | return StopEvent(result="Done.") 21 | 22 | 23 | streaming_workflow = EchoWorkflow() 24 | 25 | 26 | async def main(): 27 | h = streaming_workflow.run(message="Hello!") 28 | async for ev in h.stream_events(): 29 | if type(ev) is Message: 30 | print(ev.text) 31 | print(await h) 32 | 33 | 34 | if __name__ == "__main__": 35 | asyncio.run(main()) 36 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/frontend/style.py: -------------------------------------------------------------------------------- 1 | # style.py 2 | import reflex as rx 3 | 4 | # Common styles for questions and answers. 5 | shadow = "rgba(0, 0, 0, 0.15) 0px 2px 8px" 6 | chat_margin = "20%" 7 | message_style = dict( 8 | padding="1em", 9 | border_radius="5px", 10 | margin_y="0.5em", 11 | box_shadow=shadow, 12 | max_width="30em", 13 | display="inline-block", 14 | ) 15 | 16 | # Set specific styles for questions and answers. 17 | question_style = message_style | dict( 18 | background_color=rx.color("gray", 4), 19 | margin_left=chat_margin, 20 | ) 21 | answer_style = message_style | dict( 22 | background_color=rx.color("accent", 8), 23 | margin_right=chat_margin, 24 | ) 25 | 26 | # Styles for the action bar. 27 | input_style = dict( 28 | border_width="1px", 29 | padding="0.25em", 30 | box_shadow=shadow, 31 | width="350px", 32 | ) 33 | button_style = dict( 34 | background_color=rx.color("accent", 10), 35 | box_shadow=shadow, 36 | ) 37 | -------------------------------------------------------------------------------- /examples/python_dependencies/workflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | from cowpy import cow 4 | from fortune import fortune 5 | from pyfiglet import Figlet 6 | from workflows import Workflow, step 7 | from workflows.events import StartEvent, StopEvent 8 | 9 | 10 | # create a dummy workflow 11 | class EchoWorkflow(Workflow): 12 | """A dummy workflow with only one step sending back the input given.""" 13 | 14 | @step() 15 | async def run_step(self, ev: StartEvent) -> StopEvent: 16 | msg = str(ev.get("message", "")) 17 | font = str(ev.get("font", "blocky")) 18 | fortune_msg = fortune() 19 | f = Figlet(font=font) 20 | ascii_art_message = f.renderText(msg) 21 | ascii_art_message += cow.Stegosaurus().milk(fortune_msg) 22 | return StopEvent(result=ascii_art_message) 23 | 24 | 25 | echo_workflow = EchoWorkflow() 26 | 27 | 28 | async def main(): 29 | print(await echo_workflow.run(message="Hello!")) 30 | 31 | 32 | if __name__ == "__main__": 33 | asyncio.run(main()) 34 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_hitl.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pathlib import Path 3 | 4 | import pytest 5 | from workflows.events import HumanResponseEvent 6 | 7 | from llama_deploy.types import TaskDefinition 8 | 9 | 10 | @pytest.mark.asyncio 11 | async def test_hitl(apiserver, client): 12 | here = Path(__file__).parent 13 | deployment_fp = here / "deployments" / "deployment_hitl.yml" 14 | with open(deployment_fp) as f: 15 | deployment = await client.apiserver.deployments.create( 16 | f, base_path=deployment_fp.parent 17 | ) 18 | 19 | task_handler = await deployment.tasks.create(TaskDefinition(input="{}")) 20 | ev_def = await task_handler.send_event( 21 | ev=HumanResponseEvent(response="42"), service_name="hitl_workflow" 22 | ) 23 | 24 | # wait for workflow to finish 25 | await asyncio.sleep(0.1) 26 | 27 | result = await task_handler.results() 28 | assert ev_def.service_id == "hitl_workflow" 29 | assert result.result == "42", "The human's response is not consistent." 30 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/deployments/src/workflow_env.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | from workflows import Context, Workflow, step 5 | from workflows.events import StartEvent, StopEvent 6 | 7 | 8 | class MyWorkflow(Workflow): 9 | @step() 10 | async def run_step(self, ctx: Context, ev: StartEvent) -> StopEvent: 11 | var_1 = os.environ.get("VAR_1") 12 | var_2 = os.environ.get("VAR_2") 13 | api_key = os.environ.get("API_KEY") 14 | return StopEvent( 15 | # result depends on variables read from environment 16 | result=(f"var_1: {var_1}, var_2: {var_2}, api_key: {api_key}") 17 | ) 18 | 19 | 20 | workflow = MyWorkflow() 21 | 22 | 23 | async def main(w: Workflow): 24 | h = w.run() 25 | print(await h) 26 | 27 | 28 | if __name__ == "__main__": 29 | import os 30 | 31 | # set env variables 32 | os.environ["VAR_1"] = "x" 33 | os.environ["VAR_1"] = "y" 34 | os.environ["API_KEY"] = "123" 35 | 36 | w = MyWorkflow() 37 | 38 | asyncio.run(main(w)) 39 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_reload.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from llama_deploy.types import TaskDefinition 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_reload(apiserver, client): 10 | here = Path(__file__).parent 11 | deployment_fp = here / "deployments" / "deployment_reload1.yml" 12 | with open(deployment_fp) as f: 13 | deployment = await client.apiserver.deployments.create( 14 | f, base_path=deployment_fp.parent 15 | ) 16 | 17 | tasks = deployment.tasks 18 | res = await tasks.run(TaskDefinition(input='{"data": "bar"}')) 19 | assert res == "I have received:bar" 20 | 21 | deployment_fp = here / "deployments" / "deployment_reload2.yml" 22 | with open(deployment_fp) as f: 23 | deployment = await client.apiserver.deployments.create( 24 | f, base_path=deployment_fp.parent, reload=True 25 | ) 26 | 27 | tasks = deployment.tasks 28 | res = await tasks.run(TaskDefinition(input='{"data": "bar"}')) 29 | assert res == "Ho ricevuto:bar" 30 | -------------------------------------------------------------------------------- /llama_deploy/cli/sessions.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from llama_deploy.client import Client 4 | 5 | from .internal.config import ConfigProfile 6 | 7 | 8 | @click.group() 9 | def sessions() -> None: 10 | """Manage sessions for a given deployment.""" 11 | pass 12 | 13 | 14 | @click.command() 15 | @click.pass_obj # config_profile 16 | @click.option( 17 | "-d", "--deployment", required=True, is_flag=False, help="Deployment name" 18 | ) 19 | @click.pass_context 20 | def create( 21 | ctx: click.Context, 22 | config_profile: ConfigProfile, 23 | deployment: str, 24 | ) -> None: 25 | client = Client( 26 | api_server_url=config_profile.server, 27 | disable_ssl=config_profile.insecure, 28 | timeout=config_profile.timeout, 29 | ) 30 | 31 | try: 32 | d = client.sync.apiserver.deployments.get(deployment) 33 | session_def = d.sessions.create() 34 | except Exception as e: 35 | raise click.ClickException(str(e)) 36 | 37 | click.echo(session_def) 38 | 39 | 40 | sessions.add_command(create) 41 | -------------------------------------------------------------------------------- /examples/python_dependencies/uv.lock: -------------------------------------------------------------------------------- 1 | version = 1 2 | revision = 2 3 | requires-python = ">=3.10, <4.0" 4 | 5 | [[package]] 6 | name = "pyfiglet" 7 | version = "1.0.3" 8 | source = { registry = "https://pypi.org/simple" } 9 | sdist = { url = "https://files.pythonhosted.org/packages/94/49/2554c0b7fef12c0b9633352bbd8751cc616f8e8880e0ebab7732c1535564/pyfiglet-1.0.3.tar.gz", hash = "sha256:bad3b55d2eccb30d4693ccfd94573c2a3477dd75f86a0e5465cea51bdbfe2875", size = 833445, upload-time = "2025-06-02T12:13:29.357Z" } 10 | wheels = [ 11 | { url = "https://files.pythonhosted.org/packages/51/1d/f2cb03dd71a4dba891f808333fa505a6ed2762a8514d8ead7e423fa77e1b/pyfiglet-1.0.3-py3-none-any.whl", hash = "sha256:671bd101ca6a08dc2d94c6a2cda75a862c5e162b980af47d0ba4023837e36489", size = 1087203, upload-time = "2025-06-02T12:13:27.393Z" }, 12 | ] 13 | 14 | [[package]] 15 | name = "uv-requirements" 16 | version = "0.1.0" 17 | source = { virtual = "." } 18 | dependencies = [ 19 | { name = "pyfiglet" }, 20 | ] 21 | 22 | [package.metadata] 23 | requires-dist = [{ name = "pyfiglet", specifier = ">=1.0.3" }] 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LlamaIndex 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /llama_deploy/cli/status.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from llama_deploy.client import Client 4 | from llama_deploy.types.apiserver import StatusEnum 5 | 6 | from .internal.config import ConfigProfile 7 | 8 | 9 | @click.command() 10 | @click.pass_obj # config_profile 11 | def status(config_profile: ConfigProfile) -> None: 12 | """Print the API Server status.""" 13 | client = Client( 14 | api_server_url=config_profile.server, 15 | disable_ssl=config_profile.insecure, 16 | timeout=config_profile.timeout, 17 | ) 18 | 19 | try: 20 | status = client.sync.apiserver.status() 21 | except Exception as e: 22 | raise click.ClickException(str(e)) 23 | 24 | if status.status == StatusEnum.HEALTHY: 25 | click.echo("LlamaDeploy is up and running.") 26 | if status.deployments: 27 | click.echo("\nActive deployments:") 28 | for d in status.deployments: 29 | click.echo(f"- {d}") 30 | else: 31 | click.echo("\nCurrently there are no active deployments") 32 | else: 33 | click.echo(f"LlamaDeploy is unhealthy: {status.status_message}") 34 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # LlamaDeploy Documentation 2 | 3 | This repository contains the documentation for LlamaDeploy, built using MkDocs with Material theme. 4 | 5 | ## Setup 6 | 7 | ### Prerequisites 8 | - Python 3.10 or higher 9 | - uv (for dependency management) 10 | 11 | ### Installation 12 | 13 | 1. Clone the repository 14 | 2. Install dependencies using uv: 15 | ```bash 16 | uv sync 17 | ``` 18 | 19 | ## Development 20 | 21 | To start the documentation server locally: 22 | ```bash 23 | uv run mkdocs serve 24 | ``` 25 | 26 | This will start a development server at `http://127.0.0.1:8000`. 27 | 28 | ## Building 29 | 30 | LlamaDeploy is part of LlamaIndex [documentation portal](https://docs.llamaindex.ai/) 31 | so the build is performed from the [main repository](https://github.com/run-llama/llama_index). 32 | 33 | > [!WARNING] 34 | > When a documentation change is merged here, the change won't be visible until a new 35 | > build is triggered from the LlamaIndex repository. 36 | 37 | 38 | ## Contributing 39 | 40 | Contributions are very welcome! 41 | 42 | 1. Create a new branch for your changes 43 | 2. Make your changes to the documentation 44 | 3. Test locally using `uv run mkdocs serve` 45 | 4. Submit a pull request 46 | -------------------------------------------------------------------------------- /docs/docs/css/style.css: -------------------------------------------------------------------------------- 1 | .md-container .jp-Cell-outputWrapper .jp-OutputPrompt.jp-OutputArea-prompt, 2 | .md-container .jp-Cell-inputWrapper .jp-InputPrompt.jp-InputArea-prompt { 3 | display: none !important; 4 | } 5 | 6 | /* CSS styles for side-by-side layout */ 7 | .container { 8 | display: flex-col; 9 | justify-content: space-between; 10 | margin-bottom: 20px; /* Adjust spacing between sections */ 11 | position: sticky; 12 | top: 2.4rem; 13 | z-index: 1000; /* Ensure it's above other content */ 14 | background-color: white; /* Match your page background */ 15 | padding: 0.2rem; 16 | } 17 | 18 | .example-heading { 19 | margin: 0.2rem !important; 20 | } 21 | 22 | .usage-examples { 23 | width: 100%; /* Adjust the width as needed */ 24 | border: 1px solid var(--md-default-fg-color--light); 25 | border-radius: 2px; 26 | padding: 0.2rem; 27 | } 28 | 29 | /* Additional styling for the toggle */ 30 | .toggle-example { 31 | cursor: pointer; 32 | color: white; 33 | text-decoration: underline; 34 | background-color: var(--md-primary-fg-color); 35 | padding: 0.2rem; 36 | border-radius: 2px; 37 | } 38 | 39 | .hidden { 40 | display: none; 41 | } 42 | 43 | /* mendable search styling */ 44 | #my-component-root > div { 45 | bottom: 100px; 46 | } 47 | -------------------------------------------------------------------------------- /llama_deploy/client/base.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import httpx 4 | from pydantic_settings import BaseSettings, SettingsConfigDict 5 | 6 | 7 | class _BaseClient(BaseSettings): 8 | """Base type for clients, to be used in Pydantic models to avoid circular imports. 9 | 10 | Settings can be passed to the Client constructor when creating an instance, or defined with environment variables 11 | having names prefixed with the string `LLAMA_DEPLOY_`, e.g. `LLAMA_DEPLOY_DISABLE_SSL`. 12 | """ 13 | 14 | model_config = SettingsConfigDict(env_prefix="LLAMA_DEPLOY_") 15 | 16 | api_server_url: str = "http://localhost:4501" 17 | disable_ssl: bool = False 18 | timeout: float | None = 120.0 19 | poll_interval: float = 0.5 20 | 21 | async def request( 22 | self, method: str, url: str | httpx.URL, **kwargs: Any 23 | ) -> httpx.Response: 24 | """Performs an async HTTP request using httpx.""" 25 | verify = kwargs.pop("verify", True) 26 | timeout = kwargs.pop("timeout", self.timeout) 27 | async with httpx.AsyncClient(verify=verify) as client: 28 | response = await client.request(method, url, timeout=timeout, **kwargs) 29 | response.raise_for_status() 30 | return response 31 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/app.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from fastapi import FastAPI 5 | from fastapi.middleware.cors import CORSMiddleware 6 | from fastapi.requests import Request 7 | from fastapi.responses import JSONResponse 8 | 9 | from .routers import deployments_router, status_router 10 | from .server import lifespan 11 | from .settings import settings 12 | from .tracing import configure_tracing 13 | 14 | logger = logging.getLogger("uvicorn.info") 15 | 16 | 17 | app = FastAPI(lifespan=lifespan) 18 | 19 | # Setup tracing 20 | configure_tracing(settings) 21 | 22 | # Configure CORS middleware if the environment variable is set 23 | if not os.environ.get("DISABLE_CORS", False): 24 | app.add_middleware( 25 | CORSMiddleware, 26 | allow_origins=["*"], # Allows all origins 27 | allow_credentials=True, 28 | allow_methods=["GET", "POST"], 29 | allow_headers=["Content-Type", "Authorization"], 30 | ) 31 | 32 | app.include_router(deployments_router) 33 | app.include_router(status_router) 34 | 35 | 36 | @app.get("/") 37 | async def root(request: Request) -> JSONResponse: 38 | return JSONResponse( 39 | { 40 | "swagger_docs": f"{request.base_url}docs", 41 | "status": f"{request.base_url}status", 42 | } 43 | ) 44 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/test_deploy.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | 6 | @pytest.mark.asyncio 7 | async def test_deploy(apiserver, client): 8 | here = Path(__file__).parent 9 | deployment_fp = here / "deployments" / "deployment1.yml" 10 | with open(deployment_fp) as f: 11 | await client.apiserver.deployments.create(f, base_path=deployment_fp.parent) 12 | 13 | status = await client.apiserver.status() 14 | assert "TestDeployment1" in status.deployments 15 | 16 | 17 | def test_deploy_sync(apiserver, client): 18 | here = Path(__file__).parent 19 | deployment_fp = here / "deployments" / "deployment1.yml" 20 | with open(deployment_fp) as f: 21 | client.sync.apiserver.deployments.create(f, base_path=deployment_fp.parent) 22 | 23 | assert "TestDeployment1" in client.sync.apiserver.status().deployments 24 | 25 | 26 | @pytest.mark.asyncio 27 | async def test_deploy_local(apiserver, client): 28 | here = Path(__file__).parent 29 | deployment_fp = here / "deployments" / "deployment2.yml" 30 | with open(deployment_fp) as f: 31 | await client.apiserver.deployments.create( 32 | f, base_path=str(deployment_fp.parent.resolve()) 33 | ) 34 | 35 | status = await client.apiserver.status() 36 | assert "TestDeployment2" in status.deployments 37 | -------------------------------------------------------------------------------- /tests/apiserver/data/example.yaml: -------------------------------------------------------------------------------- 1 | name: MyDeployment 2 | 3 | control-plane: 4 | port: 8000 5 | 6 | message-queue: 7 | type: simple 8 | host: "127.0.0.1" 9 | port: 8001 10 | 11 | default-service: myworkflow 12 | 13 | services: 14 | myworkflow: 15 | # A python workflow available in a git repo 16 | name: My Python Workflow 17 | source: 18 | type: git 19 | location: git@github.com/myorg/myrepo 20 | import-path: src/python/app # relative to root of the repo 21 | port: 1313 22 | python-dependencies: 23 | # this is a list to match the requirements.txt format 24 | - "llama-index-core<1" 25 | - "llama-index-llms-openai" 26 | # we can also support installing a req file relative to `path` 27 | # if source is a git repository 28 | - "requirements.txt" 29 | env: 30 | VAR_1: x 31 | VAR_2: y 32 | env-files: 33 | - ./.env 34 | 35 | another-workflow: 36 | # A LITS workflow available in a git repo (might be the same) 37 | name: My LITS Workflow 38 | source: 39 | type: git 40 | location: git@github.com/myorg/myrepo 41 | import-path: src/ts/app 42 | port: 1313 43 | ts-dependencies: 44 | # this is a mapping to match the package.json format 45 | "@llamaindex/core": "^0.2.0" 46 | "@notionhq/client": "^2.2.15" 47 | -------------------------------------------------------------------------------- /.github/workflows/docker_release.yml: -------------------------------------------------------------------------------- 1 | name: Docker image release 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | tags: 9 | - "v[0-9]+.[0-9]+.[0-9]+*" 10 | 11 | env: 12 | DOCKER_REPO_NAME: llamaindex/llama-deploy 13 | 14 | jobs: 15 | build-and-push: 16 | name: Build base image 17 | runs-on: ubuntu-latest 18 | # don't run from forks 19 | if: github.repository_owner == 'run-llama' 20 | 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v4 24 | 25 | - name: Set up QEMU 26 | uses: docker/setup-qemu-action@v3 27 | 28 | - name: Set up Docker Buildx 29 | uses: docker/setup-buildx-action@v3 30 | 31 | - name: Login to DockerHub 32 | uses: docker/login-action@v3 33 | with: 34 | username: ${{ secrets.DOCKER_HUB_USER }} 35 | password: ${{ secrets.DOCKER_HUB_TOKEN }} 36 | 37 | - name: Docker meta 38 | id: meta 39 | uses: docker/metadata-action@v5 40 | with: 41 | images: $DOCKER_REPO_NAME 42 | 43 | - name: Build images 44 | uses: docker/bake-action@v5 45 | env: 46 | IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} 47 | LLAMA_DEPLOY_VERSION: ${{ steps.meta.outputs.version }} 48 | with: 49 | workdir: docker 50 | targets: all 51 | push: true 52 | -------------------------------------------------------------------------------- /e2e_tests/apiserver/conftest.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | from pathlib import Path 3 | 4 | import httpx 5 | import pytest 6 | import uvicorn 7 | from tenacity import retry, wait_exponential 8 | 9 | from llama_deploy.client import Client 10 | 11 | 12 | def run_apiserver(): 13 | uvicorn.run("llama_deploy.apiserver.app:app", host="127.0.0.1", port=4501) 14 | 15 | 16 | @retry(wait=wait_exponential(min=1, max=10)) 17 | def wait_for_healthcheck(): 18 | response = httpx.get("http://127.0.0.1:4501/status/") 19 | response.raise_for_status() 20 | 21 | 22 | @pytest.fixture(scope="function") 23 | def apiserver(): 24 | ctx = multiprocessing.get_context("spawn") 25 | p = ctx.Process(target=run_apiserver) 26 | p.start() 27 | wait_for_healthcheck() 28 | 29 | yield 30 | 31 | p.terminate() 32 | p.join(timeout=3) 33 | if p.is_alive(): 34 | p.kill() 35 | 36 | 37 | @pytest.fixture(scope="function") 38 | def apiserver_with_rc(monkeypatch): 39 | here = Path(__file__).parent 40 | rc_path = here / "rc" 41 | monkeypatch.setenv("LLAMA_DEPLOY_APISERVER_RC_PATH", str(rc_path)) 42 | 43 | p = multiprocessing.Process(target=run_apiserver) 44 | p.start() 45 | wait_for_healthcheck() 46 | 47 | yield 48 | 49 | p.terminate() 50 | p.join(timeout=3) 51 | if p.is_alive(): 52 | p.kill() 53 | p.close() 54 | 55 | 56 | @pytest.fixture 57 | def client(): 58 | return Client(api_server_url="http://127.0.0.1:4501") 59 | -------------------------------------------------------------------------------- /llama_deploy/cli/deploy.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import click 4 | 5 | from llama_deploy.client import Client 6 | 7 | from .internal.config import ConfigProfile 8 | 9 | 10 | @click.command() 11 | @click.pass_obj # config_profile 12 | @click.option("--reload", is_flag=True) 13 | @click.option( 14 | "--base-path", 15 | required=False, 16 | type=click.Path(file_okay=False, resolve_path=True, path_type=Path), # type: ignore 17 | ) 18 | @click.argument( 19 | "deployment_config_file", 20 | type=click.Path(dir_okay=False, resolve_path=True, path_type=Path), # type: ignore 21 | ) 22 | def deploy( 23 | config_profile: ConfigProfile, 24 | reload: bool, 25 | deployment_config_file: Path, 26 | base_path: Path | None, 27 | ) -> None: 28 | """Create or reload a deployment.""" 29 | client = Client( 30 | api_server_url=config_profile.server, 31 | disable_ssl=config_profile.insecure, 32 | timeout=config_profile.timeout, 33 | ) 34 | 35 | final_base_path = base_path or deployment_config_file.parent 36 | 37 | try: 38 | with open(deployment_config_file, "rb") as f: 39 | deployment = client.sync.apiserver.deployments.create( 40 | f, 41 | base_path=final_base_path, 42 | reload=reload, 43 | ) 44 | except Exception as e: 45 | raise click.ClickException(str(e)) 46 | 47 | click.echo(f"Deployment successful: {deployment.id}") 48 | -------------------------------------------------------------------------------- /llama_deploy/client/client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Any 3 | 4 | from .base import _BaseClient 5 | from .models import ApiServer, make_sync 6 | 7 | 8 | class Client(_BaseClient): 9 | """The LlamaDeploy Python client. 10 | 11 | The client is gives access to both the asyncio and non-asyncio APIs. To access the sync 12 | API just use methods of `client.sync`. 13 | 14 | Example usage: 15 | ```py 16 | from llama_deploy.client import Client 17 | 18 | # Use the same client instance 19 | c = Client() 20 | 21 | async def an_async_function(): 22 | status = await client.apiserver.status() 23 | 24 | def normal_function(): 25 | status = client.sync.apiserver.status() 26 | ``` 27 | """ 28 | 29 | @property 30 | def sync(self) -> "_SyncClient": 31 | """Returns the sync version of the client API.""" 32 | try: 33 | asyncio.get_running_loop() 34 | except RuntimeError: 35 | return _SyncClient(**self.model_dump()) 36 | 37 | msg = "You cannot use the sync client within an async event loop - just await the async methods directly." 38 | raise RuntimeError(msg) 39 | 40 | @property 41 | def apiserver(self) -> ApiServer: 42 | """Access the API Server functionalities.""" 43 | return ApiServer(client=self, id="apiserver") 44 | 45 | 46 | class _SyncClient(_BaseClient): 47 | @property 48 | def apiserver(self) -> Any: 49 | return make_sync(ApiServer)(client=self, id="apiserver") 50 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/source_managers/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from pathlib import Path 3 | 4 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig, SyncPolicy 5 | 6 | 7 | class SourceManager(ABC): 8 | """Protocol to be implemented by classes responsible for managing Deployment sources.""" 9 | 10 | def __init__(self, config: DeploymentConfig, base_path: Path | None = None) -> None: 11 | self._config = config 12 | self._base_path = base_path 13 | 14 | @abstractmethod 15 | def sync( 16 | self, 17 | source: str, 18 | destination: str | None = None, 19 | sync_policy: SyncPolicy = SyncPolicy.REPLACE, 20 | ) -> None: # pragma: no cover 21 | """Fetches resources from `source` so they can be used in a deployment. 22 | 23 | Optionally uses `destination` to store data when this makes sense for the 24 | specific source type. 25 | """ 26 | 27 | def relative_path(self, source: str) -> str: 28 | """Unfortunately, there's a difference in behavior of how the source managers sync. 29 | The local source manager syncs the source into the /, whereas 30 | the git source manager just syncs the source into the . This is a temporary shim, since 31 | changing this behavior is a breaking change to deployment.yaml configurations. Local source manager 32 | overrides it. In a future major version, this behavior will be made consistent""" 33 | return "" 34 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/routers/status.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from fastapi import APIRouter 3 | from fastapi.exceptions import HTTPException 4 | from fastapi.responses import PlainTextResponse 5 | 6 | from llama_deploy.apiserver.server import manager 7 | from llama_deploy.apiserver.settings import settings 8 | from llama_deploy.types.apiserver import Status, StatusEnum 9 | 10 | status_router = APIRouter( 11 | prefix="/status", 12 | ) 13 | 14 | 15 | @status_router.get("/") 16 | async def status() -> Status: 17 | return Status( 18 | status=StatusEnum.HEALTHY, 19 | max_deployments=manager._max_deployments, 20 | deployments=list(manager._deployments.keys()), 21 | status_message="", 22 | ) 23 | 24 | 25 | @status_router.get("/metrics") 26 | async def metrics() -> PlainTextResponse: 27 | """Proxies the Prometheus metrics endpoint through the API Server. 28 | 29 | This endpoint is mostly used in serverless environments where the LlamaDeploy 30 | container cannot expose more than one port (e.g. Knative, Google Cloud Run). 31 | If Prometheus is not enabled, this endpoint returns an empty HTTP-204 response. 32 | """ 33 | if not settings.prometheus_enabled: 34 | return PlainTextResponse(status_code=204) 35 | 36 | try: 37 | async with httpx.AsyncClient() as client: 38 | response = await client.get(f"http://127.0.0.1:{settings.prometheus_port}/") 39 | return PlainTextResponse(content=response.text) 40 | except httpx.RequestError as exc: 41 | raise HTTPException(status_code=500, detail=str(exc)) 42 | -------------------------------------------------------------------------------- /tests/apiserver/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Iterator 3 | from unittest import mock 4 | 5 | import pytest 6 | from fastapi.testclient import TestClient 7 | from workflows import Workflow, step 8 | from workflows.events import StartEvent, StopEvent 9 | 10 | from llama_deploy.apiserver.app import app 11 | from llama_deploy.apiserver.deployment import Deployment 12 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig 13 | 14 | 15 | class SmallWorkflow(Workflow): 16 | @step() 17 | async def run_step(self, ev: StartEvent) -> StopEvent: 18 | return StopEvent(result="Hello, world!") 19 | 20 | 21 | @pytest.fixture 22 | def mock_importlib() -> Iterator[None]: 23 | with mock.patch("llama_deploy.apiserver.deployment.importlib") as importlib: 24 | importlib.import_module.return_value = mock.MagicMock( 25 | my_workflow=SmallWorkflow() 26 | ) 27 | yield 28 | 29 | 30 | @pytest.fixture 31 | def data_path() -> Path: 32 | data_p = Path(__file__).parent / "data" 33 | return data_p.resolve() 34 | 35 | 36 | @pytest.fixture 37 | def mocked_deployment(data_path: Path, mock_importlib: Any) -> Iterator[Deployment]: 38 | config = DeploymentConfig.from_yaml(data_path / "git_service.yaml") 39 | with mock.patch("llama_deploy.apiserver.deployment.SOURCE_MANAGERS") as sm_dict: 40 | sm_dict["git"] = mock.MagicMock() 41 | yield Deployment(config=config, base_path=data_path, deployment_path=Path(".")) 42 | 43 | 44 | @pytest.fixture 45 | def http_client() -> TestClient: 46 | return TestClient(app) 47 | -------------------------------------------------------------------------------- /tests/cli/test_sessions.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import httpx 4 | from click.testing import CliRunner 5 | 6 | from llama_deploy.cli import llamactl 7 | 8 | 9 | def test_session_create(runner: CliRunner) -> None: 10 | with mock.patch("llama_deploy.cli.sessions.Client") as mocked_client: 11 | mocked_deployment = mock.MagicMock() 12 | mocked_deployment.sessions.create.return_value = mock.MagicMock( 13 | id="test_session" 14 | ) 15 | mocked_client.return_value.sync.apiserver.deployments.get.return_value = ( 16 | mocked_deployment 17 | ) 18 | 19 | result = runner.invoke( 20 | llamactl, 21 | ["sessions", "create", "-d", "deployment_name"], 22 | ) 23 | 24 | mocked_client.assert_called_with( 25 | api_server_url="http://localhost:4501", disable_ssl=False, timeout=120.0 26 | ) 27 | 28 | mocked_deployment.sessions.create.assert_called_once() 29 | assert result.exit_code == 0 30 | 31 | 32 | def test_sessions_create_error(runner: CliRunner) -> None: 33 | with mock.patch("llama_deploy.cli.sessions.Client") as mocked_client: 34 | mocked_client.return_value.sync.apiserver.deployments.get.side_effect = ( 35 | httpx.HTTPStatusError( 36 | "test error", response=mock.MagicMock(), request=mock.MagicMock() 37 | ) 38 | ) 39 | 40 | result = runner.invoke( 41 | llamactl, ["sessions", "create", "-d", "deployment_name"] 42 | ) 43 | 44 | assert result.exit_code == 1 45 | assert result.output == "Error: test error\n" 46 | -------------------------------------------------------------------------------- /tests/cli/internal/test_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest import mock 3 | 4 | from llama_deploy.cli.internal.config import ( 5 | Config, 6 | ConfigProfile, 7 | _default_config_path, 8 | load_config, 9 | ) 10 | from llama_deploy.cli.internal.utils import DEFAULT_CONFIG_FILE_NAME 11 | 12 | 13 | def test_load_config(data_path: Path) -> None: 14 | test_config_file = data_path / DEFAULT_CONFIG_FILE_NAME 15 | config = load_config(path=test_config_file) 16 | assert "test" in config.profiles 17 | 18 | 19 | def test_load_config_no_path(tmp_path: Path) -> None: 20 | with mock.patch("llama_deploy.cli.internal.utils.user_config_dir") as mock_dir: 21 | mock_dir.return_value = tmp_path 22 | config = load_config(path=None) 23 | assert len(config.profiles) == 1 24 | assert "default" in config.profiles 25 | 26 | 27 | def test__default_config_path() -> None: 28 | assert str(_default_config_path()).endswith(DEFAULT_CONFIG_FILE_NAME) 29 | 30 | 31 | def test_config_write(tmp_path: Path) -> None: 32 | config_path = tmp_path / "test.yaml" 33 | assert not config_path.exists() 34 | config = Config( 35 | current_profile="test", profiles={"test": ConfigProfile()}, path=config_path 36 | ) 37 | config.write() 38 | assert config_path.exists() 39 | 40 | 41 | def test_config_dir_doesnt_exist(tmp_path: Path) -> None: 42 | with mock.patch("llama_deploy.cli.internal.utils.user_config_dir") as mock_dir: 43 | mock_dir.return_value = tmp_path / "config" / "folder" 44 | config = load_config(path=None) 45 | assert len(config.profiles) == 1 46 | assert "default" in config.profiles 47 | -------------------------------------------------------------------------------- /examples/python_fullstack/frontend/frontend/frontend.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | 3 | from frontend import style 4 | from frontend.state import State 5 | from frontend.session_list.component import session_list 6 | from frontend.session_list.state import SessionState 7 | 8 | 9 | def qa(content: str, idx: int) -> rx.Component: 10 | return rx.box( 11 | rx.text(content, style=style.answer_style), 12 | text_align=rx.cond(idx % 2 == 0, "right", "left"), 13 | margin_left="1em", 14 | ) 15 | 16 | 17 | def chat() -> rx.Component: 18 | return rx.box( 19 | rx.foreach(State.chat_history, lambda messages, idx: qa(messages, idx)) 20 | ) 21 | 22 | 23 | def action_bar() -> rx.Component: 24 | return rx.hstack( 25 | rx.input( 26 | value=State.question, 27 | placeholder="Ask a question", 28 | on_change=State.set_question, 29 | on_key_down=lambda key: State.handle_key_down( 30 | key, SessionState.selected_session_id 31 | ), 32 | style=style.input_style, 33 | ), 34 | rx.button( 35 | "Ask", 36 | on_click=lambda: State.answer(SessionState.selected_session_id), 37 | style=style.button_style, 38 | ), 39 | ) 40 | 41 | 42 | def index() -> rx.Component: 43 | return rx.center( 44 | rx.hstack( 45 | session_list(), 46 | rx.vstack( 47 | chat(), 48 | action_bar(), 49 | align="center", 50 | ), 51 | margin_left="4", 52 | ), 53 | ) 54 | 55 | 56 | app = rx.App() 57 | app.add_page(index, on_load=SessionState.create_default_session) 58 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Docker build system 2 | 3 | This folder contains the files needed to build the base `llama_deploy` images that 4 | can be used to simplify deployments by reducing boiler plate code. 5 | 6 | ## Image Development 7 | 8 | Images are built with [BuildKit](https://docs.docker.com/build/buildkit/) and we use 9 | `bake` to orchestrate the process. To build all the available images run: 10 | 11 | ```sh 12 | docker buildx bake all 13 | ``` 14 | 15 | You can override any `variable` defined in the `docker-bake.hcl` file and build custom 16 | images, for example if you want to use a branch from the llama_deploy repo instead of 17 | an official release, run: 18 | 19 | ```sh 20 | LLAMA_DEPLOY_VERSION=mybranch_or_tag docker buildx bake 21 | ``` 22 | 23 | ### Multi-Platform Builds 24 | 25 | `llama_deploy` images support multiple architectures. Depending on your operating 26 | system and Docker environment, you might not be able to build all of them locally. 27 | 28 | This is the error you might encounter: 29 | 30 | ``` 31 | multiple platforms feature is currently not supported for docker driver. Please switch to a different driver 32 | (eg. “docker buildx create --use”) 33 | ``` 34 | 35 | Make sure `containerd` image store is enabled, following the instruction in the [Docker documentation](https://docs.docker.com/build/building/multi-platform/#enable-the-containerd-image-store). 36 | 37 | If the problem persists, one solution is to override the `platform` option and 38 | limit local builds to the same architecture as your computer's. For example, on an Apple M1 you can limit the builds 39 | to ARM only by invoking `bake` like this: 40 | 41 | ```sh 42 | docker buildx bake control_plane --set "*.platform=linux/arm64" 43 | ``` 44 | -------------------------------------------------------------------------------- /examples/python_fullstack/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | qdrant: 3 | # Used by the RAG workflow 4 | image: qdrant/qdrant:latest 5 | ports: 6 | - "6333:6333" 7 | - "6334:6334" 8 | volumes: 9 | - qdrant_data:/qdrant/storage 10 | 11 | apiserver: 12 | # LlamaDeploy API server, will run the workflows 13 | image: llamaindex/llama-deploy:main 14 | environment: 15 | QDRANT_HOST: qdrant 16 | OPENAI_API_KEY: $OPENAI_API_KEY 17 | ports: 18 | - "4501:4501" 19 | depends_on: 20 | redis: 21 | condition: service_healthy 22 | healthcheck: 23 | test: llamactl status 24 | interval: 5s 25 | timeout: 3s 26 | retries: 5 27 | volumes: 28 | - ./:/opt/app 29 | working_dir: /opt/app 30 | 31 | deploy_workflows: 32 | # Init container, it deploys python_fullstack.yaml and exits 33 | image: llamaindex/llama-deploy:main 34 | volumes: 35 | - ./python_fullstack.yaml:/opt/python_fullstack.yaml 36 | working_dir: /opt/ 37 | depends_on: 38 | apiserver: 39 | condition: service_healthy 40 | entrypoint: llamactl -s http://apiserver:4501 -t 60 deploy python_fullstack.yaml 41 | 42 | frontend: 43 | # UI for this deployment, running at http://localhost:3000 44 | environment: 45 | APISERVER_URL: http://apiserver:4501 46 | DEPLOYMENT_NAME: MyDeployment 47 | build: 48 | context: ./frontend 49 | dockerfile: dockerfile 50 | ports: 51 | - "3000:3000" 52 | - "9000:9000" 53 | volumes: 54 | - ./frontend:/app 55 | depends_on: 56 | deploy_workflows: 57 | condition: service_completed_successfully 58 | 59 | volumes: 60 | qdrant_data: 61 | -------------------------------------------------------------------------------- /tests/apiserver/routers/test_status.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from unittest import mock 3 | 4 | import httpx 5 | from fastapi.testclient import TestClient 6 | 7 | from llama_deploy.apiserver.settings import settings 8 | 9 | 10 | def test_read_main(http_client: TestClient) -> None: 11 | response = http_client.get("/status") 12 | assert response.status_code == 200 13 | assert response.json() == { 14 | "max_deployments": 10, 15 | "deployments": [], 16 | "status": "Healthy", 17 | "status_message": "", 18 | } 19 | 20 | 21 | def test_prom_proxy_off(http_client: TestClient, monkeypatch: Any) -> None: 22 | monkeypatch.setattr(settings, "prometheus_enabled", False) 23 | response = http_client.get("/status/metrics/") 24 | assert response.status_code == 204 25 | assert response.text == "" 26 | 27 | 28 | def test_prom_proxy(http_client: TestClient) -> None: 29 | mock_metrics_response = 'metric1{label="value"} 1.0\nmetric2{label="value"} 2.0' 30 | mock_response = httpx.Response(200, text=mock_metrics_response) 31 | 32 | with mock.patch("httpx.AsyncClient.get", return_value=mock_response): 33 | response = http_client.get("/status/metrics") 34 | assert response.status_code == 200 35 | assert response.text == mock_metrics_response 36 | 37 | 38 | def test_prom_proxy_failure(http_client: TestClient) -> None: 39 | # Mock the HTTP client to raise an exception 40 | with mock.patch( 41 | "httpx.AsyncClient.get", side_effect=httpx.RequestError("Connection failed") 42 | ): 43 | response = http_client.get("/status/metrics") 44 | assert response.status_code == 500 45 | assert response.json()["detail"] == "Connection failed" 46 | -------------------------------------------------------------------------------- /tests/apiserver/test_config_parser.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from llama_deploy.apiserver.deployment_config_parser import DeploymentConfig 4 | 5 | 6 | def do_assert(config: DeploymentConfig) -> None: 7 | assert config.name == "MyDeployment" 8 | assert config.default_service == "myworkflow" 9 | 10 | wf_config = config.services["myworkflow"] 11 | assert wf_config.name == "My Python Workflow" 12 | assert wf_config.source 13 | assert wf_config.source.type == "git" 14 | assert wf_config.source.location == "git@github.com/myorg/myrepo" 15 | assert wf_config.import_path == "src/python/app" 16 | assert wf_config.port == 1313 17 | assert wf_config.python_dependencies 18 | assert len(wf_config.python_dependencies) == 3 19 | assert wf_config.env == {"VAR_1": "x", "VAR_2": "y"} 20 | assert wf_config.env_files == ["./.env"] 21 | 22 | wf_config = config.services["another-workflow"] 23 | assert wf_config.name == "My LITS Workflow" 24 | assert wf_config.source 25 | assert wf_config.source.type == "git" 26 | assert wf_config.source.location == "git@github.com/myorg/myrepo" 27 | assert wf_config.import_path == "src/ts/app" 28 | assert wf_config.port == 1313 29 | assert wf_config.ts_dependencies 30 | assert len(wf_config.ts_dependencies) == 2 31 | assert wf_config.ts_dependencies["@llamaindex/core"] == "^0.2.0" 32 | 33 | 34 | def test_load_config_file(data_path: Path) -> None: 35 | config = DeploymentConfig.from_yaml(data_path / "example.yaml") 36 | do_assert(config) 37 | 38 | 39 | def test_from_yaml_bytes(data_path: Path) -> None: 40 | with open(data_path / "example.yaml", "rb") as config_f: 41 | config = DeploymentConfig.from_yaml_bytes(config_f.read()) 42 | do_assert(config) 43 | -------------------------------------------------------------------------------- /llama_deploy/apiserver/source_managers/git.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from pathlib import Path 3 | from typing import Any 4 | 5 | from git import Repo 6 | 7 | from .base import SourceManager, SyncPolicy 8 | 9 | 10 | class GitSourceManager(SourceManager): 11 | """A SourceManager specialized for sources of type `git`.""" 12 | 13 | def sync( 14 | self, 15 | source: str, 16 | destination: str | None = None, 17 | sync_policy: SyncPolicy = SyncPolicy.REPLACE, 18 | ) -> None: 19 | """Clones the repository at URL `source` into a local path `destination`. 20 | 21 | Args: 22 | source: The URL of the git repository. It can optionally contain a branch target using the name convention 23 | `git_repo_url@branch_name`. For example, "https://example.com/llama_deploy.git@branch_name". 24 | destination: The path in the local filesystem where to clone the git repository. 25 | """ 26 | if not destination: 27 | raise ValueError("Destination cannot be empty") 28 | 29 | if Path(destination).exists(): 30 | # FIXME: pull when SyncPolicy is MERGE 31 | shutil.rmtree(destination) 32 | 33 | url, branch_name = self._parse_source(source) 34 | kwargs: dict[str, Any] = {"url": url, "to_path": destination} 35 | if branch_name: 36 | kwargs["multi_options"] = [f"-b {branch_name}", "--single-branch"] 37 | 38 | Repo.clone_from(**kwargs) 39 | 40 | @staticmethod 41 | def _parse_source(source: str) -> tuple[str, str | None]: 42 | branch_name = None 43 | toks = source.split("@") 44 | url = toks[0] 45 | if len(toks) > 1: 46 | branch_name = toks[1] 47 | 48 | return url, branch_name 49 | -------------------------------------------------------------------------------- /tests/cli/test_deploy.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from unittest import mock 3 | 4 | import httpx 5 | from click.testing import CliRunner 6 | 7 | from llama_deploy.cli import llamactl 8 | 9 | 10 | def test_deploy(runner: CliRunner, data_path: Path) -> None: 11 | test_config_file = data_path / "deployment.yaml" 12 | mocked_result = mock.MagicMock(id="test_deployment") 13 | with mock.patch("llama_deploy.cli.deploy.Client") as mocked_client: 14 | mocked_client.return_value.sync.apiserver.deployments.create.return_value = ( 15 | mocked_result 16 | ) 17 | 18 | result = runner.invoke(llamactl, ["-t", "5.0", "deploy", str(test_config_file)]) 19 | 20 | assert result.exit_code == 0 21 | assert result.output == "Deployment successful: test_deployment\n" 22 | mocked_client.assert_called_with( 23 | api_server_url="http://localhost:4501", disable_ssl=False, timeout=5.0 24 | ) 25 | file_arg = ( 26 | mocked_client.return_value.sync.apiserver.deployments.create.call_args 27 | ) 28 | assert str(test_config_file) == file_arg.args[0].name 29 | 30 | 31 | def test_deploy_failed(runner: CliRunner, data_path: Path) -> None: 32 | test_config_file = data_path / "deployment.yaml" 33 | with mock.patch("llama_deploy.cli.deploy.Client") as mocked_client: 34 | mocked_client.return_value.sync.apiserver.deployments.create.side_effect = ( 35 | httpx.HTTPStatusError( 36 | "Unauthorized!", response=mock.MagicMock(), request=mock.MagicMock() 37 | ) 38 | ) 39 | 40 | result = runner.invoke(llamactl, ["deploy", str(test_config_file)]) 41 | assert result.exit_code == 1 42 | assert result.output == "Error: Unauthorized!\n" 43 | -------------------------------------------------------------------------------- /llama_deploy/cli/run.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import click 4 | import httpx 5 | 6 | from llama_deploy.client import Client 7 | from llama_deploy.types import TaskDefinition 8 | 9 | from .internal.config import ConfigProfile 10 | 11 | 12 | @click.command() 13 | @click.pass_obj # config_profile 14 | @click.option( 15 | "-d", "--deployment", required=True, is_flag=False, help="Deployment name" 16 | ) 17 | @click.option( 18 | "-a", 19 | "--arg", 20 | multiple=True, 21 | is_flag=False, 22 | type=(str, str), 23 | help="'key value' argument to pass to the task, e.g. '-a age 30'", 24 | ) 25 | @click.option("-s", "--service", is_flag=False, help="Service name") 26 | @click.option("-i", "--session-id", is_flag=False, help="Session ID") 27 | @click.pass_context 28 | def run( 29 | ctx: click.Context, 30 | config_profile: ConfigProfile, 31 | deployment: str, 32 | arg: tuple[tuple[str, str]], 33 | service: str, 34 | session_id: str, 35 | ) -> None: 36 | """Run tasks from a given service.""" 37 | client = Client( 38 | api_server_url=config_profile.server, 39 | disable_ssl=config_profile.insecure, 40 | timeout=config_profile.timeout, 41 | ) 42 | 43 | payload = {"input": json.dumps(dict(arg))} 44 | if service: 45 | payload["service_id"] = service 46 | if session_id: 47 | payload["session_id"] = session_id 48 | 49 | try: 50 | d = client.sync.apiserver.deployments.get(deployment) 51 | result = d.tasks.run(TaskDefinition(**payload)) 52 | except Exception as e: 53 | extra_info = "" 54 | if isinstance(e, httpx.HTTPStatusError): 55 | extra_info = f" {e.response.text}" 56 | 57 | raise click.ClickException(f"{str(e)}{extra_info}") 58 | 59 | click.echo(result) 60 | -------------------------------------------------------------------------------- /docs/docs/javascript/llms_example.js: -------------------------------------------------------------------------------- 1 | var exampleTemplate = `

Framework Usage

2 | `; 6 | 7 | var exampleMarkdown = `\`\`\`python 8 | from llama_index.core import Settings 9 | 10 | # global default 11 | Settings.llm = llm 12 | 13 | # per-component 14 | # objects that use an LLM accept it as a kwarg 15 | index.as_query_engine(llm=llm) 16 | 17 | index.as_chat_engine(llm=llm) 18 | \`\`\``; 19 | 20 | function addToggleToExample() { 21 | const toggleExample = document.querySelector(".toggle-example"); 22 | const usageExamples = document.querySelector(".usage-examples"); 23 | 24 | toggleExample.addEventListener("click", function () { 25 | console.log("clicked!"); 26 | console.log(usageExamples); 27 | usageExamples.classList.toggle("hidden"); 28 | }); 29 | } 30 | 31 | // Add marked package as