├── CLAUDE.md
├── src
├── art
│ ├── py.typed
│ ├── unsloth
│ │ └── __init__.py
│ ├── preprocessing
│ │ └── __init__.py
│ ├── torchtune
│ │ ├── __init__.py
│ │ ├── batch.py
│ │ └── train.sh
│ ├── transformers
│ │ ├── __init__.py
│ │ └── patches.py
│ ├── local
│ │ ├── __init__.py
│ │ ├── service.py
│ │ └── checkpoints.py
│ ├── skypilot
│ │ ├── __init__.py
│ │ └── stop_server.py
│ ├── serverless
│ │ └── __init__.py
│ ├── rewards
│ │ └── __init__.py
│ ├── langgraph
│ │ ├── __init__.py
│ │ └── logging.py
│ ├── utils
│ │ ├── benchmarking
│ │ │ ├── charts
│ │ │ │ └── __init__.py
│ │ │ ├── types.py
│ │ │ ├── filter_model_split.py
│ │ │ └── pull_model_trajectories.py
│ │ ├── old_benchmarking
│ │ │ ├── display_image_grid.py
│ │ │ ├── calculate_step_metrics.py
│ │ │ ├── generate_comparison_table.py
│ │ │ └── types.py
│ │ ├── __init__.py
│ │ ├── get_repo_root_path.py
│ │ ├── deployment
│ │ │ ├── legacy.py
│ │ │ └── __init__.py
│ │ ├── format_message.py
│ │ ├── limit_concurrency.py
│ │ ├── log_http_errors.py
│ │ ├── get_model_step.py
│ │ ├── benchmark_rollout.py
│ │ ├── logging.py
│ │ ├── output_dirs.py
│ │ ├── strip_logprobs.py
│ │ └── deploy_model.py
│ ├── mcp
│ │ ├── default_tools.py
│ │ └── __init__.py
│ ├── dev
│ │ ├── __init__.py
│ │ ├── train.py
│ │ └── torchtune.py
│ ├── types.py
│ ├── yield_trajectory.py
│ ├── vllm
│ │ └── __init__.py
│ ├── errors.py
│ ├── batches.py
│ └── __init__.py
└── mp_actors
│ ├── __init__.py
│ └── traceback.py
├── .python-version
├── dev
├── swebench
│ ├── __init__.py
│ ├── tools
│ │ ├── registry
│ │ │ ├── lib
│ │ │ │ ├── __init__.py
│ │ │ │ └── registry.py
│ │ │ ├── config.yaml
│ │ │ ├── install.sh
│ │ │ └── bin
│ │ │ │ ├── _write_env
│ │ │ │ └── _read_env
│ │ ├── review_on_submit_m
│ │ │ ├── install.sh
│ │ │ ├── README.md
│ │ │ ├── config.yaml
│ │ │ └── bin
│ │ │ │ └── submit
│ │ └── edit_anthropic
│ │ │ ├── install.sh
│ │ │ └── bin
│ │ │ └── _state_anthropic
│ ├── sandbox
│ │ ├── __init__.py
│ │ ├── daytona.py
│ │ ├── modal.py
│ │ └── new.py
│ ├── sandboxes.py
│ ├── run.py
│ ├── pyproject.toml
│ └── trl.ipynb
├── tau-bench
│ ├── .python-version
│ ├── tau_bench
│ │ ├── model_utils
│ │ │ ├── api
│ │ │ │ ├── __init__.py
│ │ │ │ ├── types.py
│ │ │ │ ├── _model_methods.py
│ │ │ │ ├── exception.py
│ │ │ │ └── logging.py
│ │ │ ├── model
│ │ │ │ ├── __init__.py
│ │ │ │ ├── exception.py
│ │ │ │ ├── vllm_utils.py
│ │ │ │ └── outlines_completion.py
│ │ │ ├── func_tools
│ │ │ │ ├── __init__.py
│ │ │ │ ├── filter.py
│ │ │ │ └── map.py
│ │ │ └── args.py
│ │ ├── agents
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ ├── envs
│ │ │ ├── airline
│ │ │ │ ├── rules.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── wiki.py
│ │ │ │ ├── data
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── tools
│ │ │ │ │ ├── think.py
│ │ │ │ │ ├── get_user_details.py
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── get_reservation_details.py
│ │ │ │ │ ├── transfer_to_human_agents.py
│ │ │ │ │ ├── calculate.py
│ │ │ │ │ ├── cancel_reservation.py
│ │ │ │ │ ├── list_all_airports.py
│ │ │ │ │ ├── send_certificate.py
│ │ │ │ │ ├── search_direct_flight.py
│ │ │ │ │ └── update_reservation_passengers.py
│ │ │ │ └── env.py
│ │ │ ├── retail
│ │ │ │ ├── __init__.py
│ │ │ │ ├── wiki.py
│ │ │ │ ├── data
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── readme.md
│ │ │ │ ├── rules.py
│ │ │ │ ├── tools
│ │ │ │ │ ├── get_user_details.py
│ │ │ │ │ ├── list_all_product_types.py
│ │ │ │ │ ├── get_order_details.py
│ │ │ │ │ ├── get_product_details.py
│ │ │ │ │ ├── find_user_id_by_email.py
│ │ │ │ │ ├── think.py
│ │ │ │ │ ├── transfer_to_human_agents.py
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── calculate.py
│ │ │ │ │ └── find_user_id_by_name_zip.py
│ │ │ │ └── env.py
│ │ │ ├── tool.py
│ │ │ └── __init__.py
│ │ └── __init__.py
│ ├── .gitignore
│ ├── MANIFEST.in
│ ├── packed_tensor_images
│ │ ├── packed_tensors_plot_1752190878.png
│ │ ├── packed_tensors_plot_1752193757.png
│ │ ├── packed_tensors_plot_1752196743.png
│ │ ├── packed_tensors_plot_1752199731.png
│ │ ├── packed_tensors_plot_1752202622.png
│ │ ├── packed_tensors_plot_1752205600.png
│ │ ├── packed_tensors_plot_1752208547.png
│ │ ├── packed_tensors_plot_1752211467.png
│ │ ├── packed_tensors_plot_1752214557.png
│ │ └── packed_tensors_plot_1752217461.png
│ ├── check.py
│ ├── setup.py
│ ├── pyproject.toml
│ └── LICENSE
├── playwright_agent
│ ├── pyproject.toml
│ └── job_desc_dataset.json
├── test_skypilot
│ ├── launch.py
│ ├── launch_tail.py
│ └── register_model.py
└── new_models
│ ├── prompts.json
│ ├── qwen3_try.py
│ └── gemma3.py
├── examples
├── 2048
│ ├── generate_benchmarks.py
│ └── train.py
├── mcp-rl
│ ├── README.md
│ ├── servers
│ │ └── python
│ │ │ ├── mcp_balldontlie
│ │ │ ├── __init__.py
│ │ │ ├── server_params.py
│ │ │ ├── scenarios
│ │ │ │ └── val.jsonl
│ │ │ └── README.md
│ │ │ ├── mcp_googlemaps
│ │ │ ├── __init__.py
│ │ │ ├── server_params.py
│ │ │ └── pyproject.toml
│ │ │ └── mcp_alphavantage
│ │ │ ├── __init__.py
│ │ │ ├── server_params.py
│ │ │ ├── README.md
│ │ │ └── scenarios
│ │ │ └── val.jsonl
│ ├── mcp_rl
│ │ ├── __init__.py
│ │ └── utils.py
│ ├── pyproject.toml
│ └── all_experiments.py
├── just-the-facts
│ ├── README.md
│ ├── .gitignore
│ ├── just_the_facts
│ │ ├── __init__.py
│ │ ├── find_articles.py
│ │ └── experiments.py
│ ├── main.py
│ ├── test_scraper.py
│ └── pyproject.toml
├── roflbot
│ └── .gitignore
├── hn_title_generator
│ ├── skypilot.yaml
│ └── skypilot-reference-grpo-trainer.yaml
└── tic_tac_toe_self_play
│ ├── gather_trajectory_groups_by_index.py
│ └── deploy_step.py
├── docs
├── .gitignore
├── images
│ ├── forked-run.webp
│ ├── faq
│ │ └── art-loop.webp
│ ├── ruler-results.png
│ ├── site-assets
│ │ └── favicon.webp
│ └── open-deep-research-progress.png
├── package.json
├── analytics.js
├── README.md
├── style.css
├── getting-started
│ └── quick-start.mdx
├── resources
│ ├── glossary.mdx
│ └── models.mdx
├── experimental
│ └── gspo.mdx
├── fundamentals
│ └── training-loop.mdx
└── docs.json
├── assets
├── ART_logo.png
├── ART_pill.png
├── Discord.png
├── ART_E_pill.png
├── ART_header.png
├── Colab_pill.png
├── Train_pill.png
├── ART_E_graphs.png
├── Header_separator.png
├── Documentation_pill.png
└── benchmarks
│ └── codenames
│ └── win_rate_over_time.png
├── scripts
├── kill-gpu-processes.sh
├── publish.sh
├── setup.sh
├── launch-cluster.sh
├── migrate-s3-checkpoints.py
└── bump_version.py
├── .skyignore
├── .dockerignore
├── .gitignore
├── requirements
└── backend.vcs.txt
├── .env.example
├── .github
└── workflows
│ ├── ruff.yml
│ └── release.yml
├── AGENT.md
├── THIRD-PARTY-NOTICES
└── pyproject.toml
/CLAUDE.md:
--------------------------------------------------------------------------------
1 | AGENT.md
--------------------------------------------------------------------------------
/src/art/py.typed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10
2 |
--------------------------------------------------------------------------------
/dev/swebench/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/mcp-rl/README.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/art/unsloth/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/just-the-facts/README.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/art/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/art/torchtune/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/art/transformers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev/tau-bench/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 |
--------------------------------------------------------------------------------
/dev/swebench/tools/registry/lib/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev/swebench/tools/registry/config.yaml:
--------------------------------------------------------------------------------
1 | tools: {}
--------------------------------------------------------------------------------
/dev/swebench/tools/review_on_submit_m/install.sh:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/just-the-facts/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info/
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/api/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/model/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | package-lock.json
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_balldontlie/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_googlemaps/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/agents/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
--------------------------------------------------------------------------------
/examples/roflbot/.gitignore:
--------------------------------------------------------------------------------
1 | *.db
2 | /data/
3 | .env
4 | .venv/
5 |
--------------------------------------------------------------------------------
/dev/tau-bench/.gitignore:
--------------------------------------------------------------------------------
1 | results/
2 | benchmark_results/
3 | error_analysis_results/
--------------------------------------------------------------------------------
/examples/just-the-facts/just_the_facts/__init__.py:
--------------------------------------------------------------------------------
1 | # Just the Facts package
2 |
--------------------------------------------------------------------------------
/assets/ART_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/ART_logo.png
--------------------------------------------------------------------------------
/assets/ART_pill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/ART_pill.png
--------------------------------------------------------------------------------
/assets/Discord.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/Discord.png
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/rules.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | RULES = []
4 |
--------------------------------------------------------------------------------
/assets/ART_E_pill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/ART_E_pill.png
--------------------------------------------------------------------------------
/assets/ART_header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/ART_header.png
--------------------------------------------------------------------------------
/assets/Colab_pill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/Colab_pill.png
--------------------------------------------------------------------------------
/assets/Train_pill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/Train_pill.png
--------------------------------------------------------------------------------
/assets/ART_E_graphs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/ART_E_graphs.png
--------------------------------------------------------------------------------
/assets/Header_separator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/Header_separator.png
--------------------------------------------------------------------------------
/dev/tau-bench/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tau_bench *.json
2 | recursive-include tau_bench *.md
3 |
--------------------------------------------------------------------------------
/docs/images/forked-run.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/docs/images/forked-run.webp
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_alphavantage/__init__.py:
--------------------------------------------------------------------------------
1 | """MCP AlphaVantage Python Server"""
2 |
--------------------------------------------------------------------------------
/src/art/local/__init__.py:
--------------------------------------------------------------------------------
1 | from .backend import LocalBackend
2 |
3 | __all__ = ["LocalBackend"]
4 |
--------------------------------------------------------------------------------
/assets/Documentation_pill.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/Documentation_pill.png
--------------------------------------------------------------------------------
/docs/images/faq/art-loop.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/docs/images/faq/art-loop.webp
--------------------------------------------------------------------------------
/docs/images/ruler-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/docs/images/ruler-results.png
--------------------------------------------------------------------------------
/src/art/skypilot/__init__.py:
--------------------------------------------------------------------------------
1 | from .backend import SkyPilotBackend
2 |
3 | __all__ = ["SkyPilotBackend"]
4 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/api/types.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | PartialObj = dict[str, Any]
4 |
--------------------------------------------------------------------------------
/dev/swebench/tools/edit_anthropic/install.sh:
--------------------------------------------------------------------------------
1 | pip install 'tree-sitter==0.21.3'
2 | pip install 'tree-sitter-languages'
--------------------------------------------------------------------------------
/src/art/serverless/__init__.py:
--------------------------------------------------------------------------------
1 | from .backend import ServerlessBackend
2 |
3 | __all__ = ["ServerlessBackend"]
4 |
--------------------------------------------------------------------------------
/docs/images/site-assets/favicon.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/docs/images/site-assets/favicon.webp
--------------------------------------------------------------------------------
/scripts/kill-gpu-processes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | nvidia-smi --query-compute-apps=pid --format=csv,noheader | xargs -r kill -9
--------------------------------------------------------------------------------
/src/art/rewards/__init__.py:
--------------------------------------------------------------------------------
1 | from .ruler import ruler, ruler_score_group
2 |
3 | __all__ = ["ruler", "ruler_score_group"]
4 |
--------------------------------------------------------------------------------
/docs/images/open-deep-research-progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/docs/images/open-deep-research-progress.png
--------------------------------------------------------------------------------
/src/art/langgraph/__init__.py:
--------------------------------------------------------------------------------
1 | from .llm_wrapper import init_chat_model, wrap_rollout
2 |
3 | __all__ = ["wrap_rollout", "init_chat_model"]
4 |
--------------------------------------------------------------------------------
/src/mp_actors/__init__.py:
--------------------------------------------------------------------------------
1 | from .move import close_proxy, move_to_child_process
2 |
3 | __all__ = ["close_proxy", "move_to_child_process"]
4 |
--------------------------------------------------------------------------------
/assets/benchmarks/codenames/win_rate_over_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/assets/benchmarks/codenames/win_rate_over_time.png
--------------------------------------------------------------------------------
/examples/just-the-facts/main.py:
--------------------------------------------------------------------------------
1 | def main():
2 | print("Hello from just-the-facts!")
3 |
4 |
5 | if __name__ == "__main__":
6 | main()
7 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from tau_bench.agents.base import Agent as Agent
4 | from tau_bench.envs.base import Env as Env
5 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from tau_bench.envs.retail.env import MockRetailDomainEnv as MockRetailDomainEnv
4 |
--------------------------------------------------------------------------------
/dev/swebench/sandbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .new import new_sandbox
2 | from .sandbox import Provider, Sandbox
3 |
4 | __all__ = ["new_sandbox", "Provider", "Sandbox"]
5 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from tau_bench.envs.airline.env import MockAirlineDomainEnv as MockAirlineDomainEnv
4 |
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752190878.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752190878.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752193757.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752193757.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752196743.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752196743.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752199731.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752199731.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752202622.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752202622.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752205600.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752205600.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752208547.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752208547.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752211467.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752211467.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752214557.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752214557.png
--------------------------------------------------------------------------------
/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752217461.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenPipe/ART/HEAD/dev/tau-bench/packed_tensor_images/packed_tensors_plot_1752217461.png
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/func_tools/__init__.py:
--------------------------------------------------------------------------------
1 | from tau_bench.model_utils.func_tools.filter import filter as filter
2 | from tau_bench.model_utils.func_tools.map import map as map
3 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/api/_model_methods.py:
--------------------------------------------------------------------------------
1 | MODEL_METHODS = [
2 | "classify",
3 | "binary_classify",
4 | "parse",
5 | "generate",
6 | "parse_force",
7 | "score",
8 | ]
9 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/wiki.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import os
4 |
5 | FOLDER_PATH = os.path.dirname(__file__)
6 |
7 | with open(os.path.join(FOLDER_PATH, "wiki.md"), "r") as f:
8 | WIKI = f.read()
9 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/wiki.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import os
4 |
5 | FOLDER_PATH = os.path.dirname(__file__)
6 |
7 | with open(os.path.join(FOLDER_PATH, "wiki.md"), "r") as f:
8 | WIKI = f.read()
9 |
--------------------------------------------------------------------------------
/dev/playwright_agent/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "playwright-agent"
3 | version = "0.1.0"
4 | requires-python = ">=3.10"
5 | dependencies = [
6 | "mcp>=1.13.1",
7 | "openpipe>=5.0.0",
8 | "panza>=0.1.0",
9 | ]
10 |
--------------------------------------------------------------------------------
/examples/mcp-rl/mcp_rl/__init__.py:
--------------------------------------------------------------------------------
1 | """ART MCP package."""
2 |
3 | from .mcp_server import AlphaMcpServer, McpServer
4 | from .rollout import McpScenario, rollout
5 |
6 | __all__ = ["rollout", "McpScenario", "McpServer", "AlphaMcpServer"]
7 |
--------------------------------------------------------------------------------
/dev/swebench/tools/registry/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # script_dir=$(dirname "$(readlink -f "$0")")
4 | bundle_dir=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
5 |
6 | export PYTHONPATH="$bundle_dir/lib":$PYTHONPATH
--------------------------------------------------------------------------------
/dev/swebench/tools/review_on_submit_m/README.md:
--------------------------------------------------------------------------------
1 | # Review on submit.
2 |
3 | Provides an alternative for `submit` that does not immediately submit, but asks the
4 | agent to perform additional reviewing steps.
5 |
6 | Only `submit -f` will trigger the real submit.
--------------------------------------------------------------------------------
/dev/tau-bench/check.py:
--------------------------------------------------------------------------------
1 | from langfuse import Langfuse
2 |
3 | langfuse = Langfuse(
4 | secret_key="sk-lf-22c352a1-945f-45fe-ae01-a4b3f67527c0",
5 | public_key="pk-lf-94184f77-f55b-4f4a-af05-1cc34b2f89bd",
6 | host="https://us.cloud.langfuse.com",
7 | )
8 |
--------------------------------------------------------------------------------
/dev/swebench/tools/review_on_submit_m/config.yaml:
--------------------------------------------------------------------------------
1 | tools:
2 | submit:
3 | signature: "submit"
4 | docstring: "submits the current file"
5 | # Do not actually show the -f argument to the model, only
6 | # use it from the agent for submission after error
7 |
--------------------------------------------------------------------------------
/src/art/utils/benchmarking/charts/__init__.py:
--------------------------------------------------------------------------------
1 | from .percentage_comparison_bar_chart import percentage_comparison_bar_chart
2 | from .training_progress_chart import training_progress_chart
3 |
4 | __all__ = ["percentage_comparison_bar_chart", "training_progress_chart"]
5 |
--------------------------------------------------------------------------------
/.skyignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .art/
3 | # .env
4 | .venv/
5 | grpo_trainer_lora_model/
6 | logs/
7 | shared_cache.db
8 | streaming-chat-completions/
9 | unsloth_compiled_cache/
10 | wandb/
11 | docs/node_modules/
12 | dist/
13 | dev/art-e/data/
14 | replays/
15 | trajectories/
16 | .DS_Store
17 | # .local/
--------------------------------------------------------------------------------
/src/art/torchtune/batch.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 | from .. import dev, types
4 | from ..preprocessing.pack import DiskPackedTensors
5 |
6 |
7 | class Batch(BaseModel):
8 | disk_packed_tensors: DiskPackedTensors
9 | config: types.TrainConfig
10 | dev_config: dev.TrainConfig
11 |
--------------------------------------------------------------------------------
/dev/swebench/tools/registry/bin/_write_env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 |
5 | from registry import registry # type: ignore
6 |
7 | if __name__ == "__main__":
8 | var_name = sys.argv[1]
9 | var_value = sys.argv[2] if len(sys.argv) > 2 else ""
10 | registry[var_name] = var_value
11 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .art/
3 | # .env
4 | .venv/
5 | grpo_trainer_lora_model/
6 | logs/
7 | shared_cache.db
8 | streaming-chat-completions/
9 | unsloth_compiled_cache/
10 | wandb/
11 | docs/node_modules/
12 | dist/
13 | replays/
14 | trajectories/
15 | .DS_Store
16 | # .local/
17 | # .claude/
18 | .vscode/
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/tool.py:
--------------------------------------------------------------------------------
1 | import abc
2 | from typing import Any
3 |
4 |
5 | class Tool(abc.ABC):
6 | @staticmethod
7 | def invoke(*args, **kwargs):
8 | raise NotImplementedError
9 |
10 | @staticmethod
11 | def get_info() -> dict[str, Any]:
12 | raise NotImplementedError
13 |
--------------------------------------------------------------------------------
/dev/swebench/tools/registry/bin/_read_env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 |
5 | from registry import registry # type: ignore
6 |
7 | if __name__ == "__main__":
8 | var_name = sys.argv[1]
9 | default_value = sys.argv[2] if len(sys.argv) > 2 else ""
10 | print(registry.get(var_name, default_value))
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .art/
3 | .env
4 | .venv/
5 | grpo_trainer_lora_model/
6 | logs/
7 | shared_cache.db
8 | data/cache.db
9 | streaming-chat-completions/
10 | unsloth_compiled_cache/
11 | wandb/
12 | docs/node_modules/
13 | dist/
14 | replays/
15 | trajectories/
16 | .DS_Store
17 | .local/
18 | .claude/
19 | .vscode/
20 | .ruff_cache/
21 | !/src/art/wandb/
22 | !/src/art/wandb/**
23 | /src/art/wandb/__pycache__/
--------------------------------------------------------------------------------
/dev/swebench/sandboxes.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | import modal
4 |
5 |
6 | async def terminate_sandboxes() -> None:
7 | sandboxes: list[modal.Sandbox] = []
8 | async for sandbox in modal.Sandbox.list.aio(
9 | app_id=modal.App.lookup("swe-rex", create_if_missing=True).app_id
10 | ):
11 | sandboxes.append(sandbox)
12 | _ = await asyncio.gather(*[sandbox.terminate.aio() for sandbox in sandboxes])
13 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/agents/base.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import abc
4 | from typing import Optional
5 |
6 | from tau_bench.envs.base import Env
7 | from tau_bench.types import SolveResult
8 |
9 |
10 | class Agent(abc.ABC):
11 | @abc.abstractmethod
12 | async def solve(
13 | self, env: Env, task_index: Optional[int] = None, max_num_steps: int = 30
14 | ) -> SolveResult:
15 | raise NotImplementedError
16 |
--------------------------------------------------------------------------------
/src/art/utils/benchmarking/types.py:
--------------------------------------------------------------------------------
1 | class BenchmarkModelKey:
2 | name: str
3 | display_name: str
4 | split: str
5 |
6 | def __init__(
7 | self, name: str, display_name: str | None = None, split: str | None = None
8 | ):
9 | self.name = name
10 | self.display_name = display_name or name
11 | self.split = split or "val"
12 |
13 | def __str__(self):
14 | return self.display_name
15 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from tau_bench.model_utils.model.model import Platform
4 |
5 |
6 | def api_parser() -> argparse.ArgumentParser:
7 | parser = argparse.ArgumentParser()
8 | parser.add_argument("--model", type=str)
9 | parser.add_argument("--base-url", type=str)
10 | parser.add_argument(
11 | "--platform", type=str, required=True, choices=[e.value for e in Platform]
12 | )
13 | return parser
14 |
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "docs",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "dev": "mintlify dev --port 3001",
8 | "build": "mintlify build",
9 | "generate:routes": "npx @mintlify/scraping@latest openapi-file ./openapi.json --outDir ./api-reference"
10 | },
11 | "keywords": [],
12 | "author": "",
13 | "license": "ISC",
14 | "dependencies": {
15 | "mintlify": "^4.0.433"
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/src/art/mcp/default_tools.py:
--------------------------------------------------------------------------------
1 | from art.mcp.types import MCPTool
2 |
3 | complete_task_tool = MCPTool(
4 | name="complete_task",
5 | description="Complete a task",
6 | parameters={
7 | "type": "object",
8 | "properties": {
9 | "summary": {
10 | "type": "string",
11 | "description": "Summary of accomplishments",
12 | }
13 | },
14 | "required": ["summary"],
15 | },
16 | )
17 |
--------------------------------------------------------------------------------
/src/art/utils/old_benchmarking/display_image_grid.py:
--------------------------------------------------------------------------------
1 | from IPython.display import HTML, display
2 |
3 |
4 | def display_image_grid(image_paths: list[str], images_per_row: int = 2):
5 | html = f"""
6 |
7 | """
8 | for path in image_paths:
9 | html += f"

"
10 | html += "
"
11 | display(HTML(html))
12 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_balldontlie/server_params.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from dotenv import load_dotenv
4 | from mcp import StdioServerParameters
5 |
6 | load_dotenv()
7 |
8 | server_params = StdioServerParameters(
9 | command="python",
10 | args=[
11 | "servers/python/mcp_balldontlie/server.py",
12 | "--api-key",
13 | os.getenv("BALLDONTLIE_API_KEY", ""),
14 | ],
15 | env={"BALLDONTLIE_API_KEY": os.getenv("BALLDONTLIE_API_KEY")},
16 | )
17 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_googlemaps/server_params.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from dotenv import load_dotenv
4 | from mcp import StdioServerParameters
5 |
6 | load_dotenv()
7 |
8 | server_params = StdioServerParameters(
9 | command="python",
10 | args=[
11 | "servers/python/mcp_googlemaps/server.py",
12 | "--api-key",
13 | os.getenv("GOOGLE_MAPS_API_KEY", ""),
14 | ],
15 | env={"GOOGLE_MAPS_API_KEY": os.getenv("GOOGLE_MAPS_API_KEY")},
16 | )
17 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_alphavantage/server_params.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from dotenv import load_dotenv
4 | from mcp import StdioServerParameters
5 |
6 | load_dotenv()
7 |
8 | server_params = StdioServerParameters(
9 | command="python",
10 | args=[
11 | "servers/python/mcp_alphavantage/server.py",
12 | "--api-key",
13 | os.getenv("ALPHAVANTAGE_API_KEY", "demo"),
14 | ],
15 | env={"ALPHAVANTAGE_API_KEY": os.getenv("ALPHAVANTAGE_API_KEY")},
16 | )
17 |
--------------------------------------------------------------------------------
/dev/swebench/tools/edit_anthropic/bin/_state_anthropic:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import json
4 | import os
5 | from pathlib import Path
6 |
7 |
8 | def main():
9 | state_path = Path("/root/state.json")
10 | if state_path.exists():
11 | state = json.loads(state_path.read_text())
12 | else:
13 | state = {}
14 |
15 | state["working_dir"] = os.getcwd()
16 |
17 | state_path.write_text(json.dumps(state))
18 |
19 |
20 | if __name__ == "__main__":
21 | main()
22 |
--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Load the .env file
5 | set -o allexport
6 | source .env
7 |
8 | # Check if PYPI_ART_TOKEN is set
9 | if [[ -z "${PYPI_ART_TOKEN}" ]]; then
10 | echo "Error: PYPI_ART_TOKEN is not set."
11 | exit 1
12 | fi
13 |
14 | # Delete the dist directory
15 | rm -rf dist
16 |
17 | # Build the package
18 | uv run hatch build
19 |
20 |
21 | # If the token is set, proceed with publishing
22 | uv publish --username=__token__ --password=$PYPI_ART_TOKEN
23 |
--------------------------------------------------------------------------------
/requirements/backend.vcs.txt:
--------------------------------------------------------------------------------
1 | # Pinned backend dependencies that must come from VCS (not allowed in PyPI metadata).
2 | # Install with:
3 | # uv pip install -r requirements/backend.vcs.txt
4 | # or
5 | # pip install -r requirements/backend.vcs.txt
6 |
7 | # Torchtune pinned to known-good commit
8 | torchtune @ git+https://github.com/pytorch/torchtune.git@2344509cf83bd886538fe3e8263e5145d1afb5c2
9 |
10 | # Unsloth Zoo pinned to known-good commit
11 | unsloth-zoo @ git+https://github.com/bradhilton/unsloth-zoo@323cf5e
12 |
--------------------------------------------------------------------------------
/src/art/mcp/__init__.py:
--------------------------------------------------------------------------------
1 | """MCP utilities for Agent Reinforcement Training."""
2 |
3 | from .default_tools import complete_task_tool
4 | from .generate_scenarios import generate_scenarios
5 | from .types import (
6 | GeneratedScenario,
7 | GeneratedScenarioCollection,
8 | MCPResource,
9 | MCPTool,
10 | )
11 |
12 | __all__ = [
13 | "MCPResource",
14 | "MCPTool",
15 | "GeneratedScenario",
16 | "GeneratedScenarioCollection",
17 | "complete_task_tool",
18 | "generate_scenarios",
19 | ]
20 |
--------------------------------------------------------------------------------
/src/art/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Import all utilities to maintain the same interface
2 | from .format_message import format_message
3 | from .get_model_step import get_model_step
4 | from .iterate_dataset import iterate_dataset
5 | from .limit_concurrency import limit_concurrency
6 | from .log_http_errors import log_http_errors
7 | from .retry import retry
8 |
9 | __all__ = [
10 | "format_message",
11 | "retry",
12 | "iterate_dataset",
13 | "limit_concurrency",
14 | "log_http_errors",
15 | "get_model_step",
16 | ]
17 |
--------------------------------------------------------------------------------
/src/art/utils/get_repo_root_path.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def get_repo_root_path() -> str:
5 | try:
6 | # search through parent directories until we find a .git directory
7 | current_dir = os.path.dirname(os.path.abspath(__file__))
8 | while not os.path.exists(os.path.join(current_dir, ".git")):
9 | if current_dir == "/":
10 | raise Exception("Could not find .git directory")
11 | current_dir = os.path.dirname(current_dir)
12 | return current_dir
13 | except Exception:
14 | return "."
15 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/func_tools/filter.py:
--------------------------------------------------------------------------------
1 | from typing import Callable, Iterable, TypeVar
2 |
3 | from tau_bench.model_utils.func_tools.map import map
4 |
5 | T = TypeVar("T")
6 |
7 | builtin_filter = filter
8 |
9 |
10 | def filter(
11 | func: Callable[[T], bool],
12 | iterable: Iterable[T],
13 | max_concurrency: int | None = None,
14 | ) -> Iterable[T]:
15 | assert max_concurrency is None or max_concurrency > 0
16 | bits = map(func, iterable=iterable, max_concurrency=max_concurrency)
17 | return [x for x, y in zip(iterable, bits) if y]
18 |
--------------------------------------------------------------------------------
/src/art/utils/deployment/legacy.py:
--------------------------------------------------------------------------------
1 | """Legacy exports for backwards compatibility."""
2 |
3 | from enum import Enum
4 |
5 | from pydantic import BaseModel
6 |
7 | from .together import TogetherJobStatus
8 |
9 |
10 | class LoRADeploymentProvider(str, Enum):
11 | """Legacy enum for deployment providers."""
12 |
13 | TOGETHER = "together"
14 | WANDB = "wandb"
15 |
16 |
17 | class LoRADeploymentJob(BaseModel):
18 | """Legacy result class for deployment jobs."""
19 |
20 | status: TogetherJobStatus
21 | job_id: str
22 | model_name: str
23 | failure_reason: str | None
24 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_googlemaps/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "mcp-googlemaps"
3 | version = "0.1.0"
4 | description = "Google Maps MCP Server - Provides access to Google Maps APIs including Geocoding and Places"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | dependencies = [
8 | "aiohttp>=3.9.0",
9 | "click>=8.1.0",
10 | "mcp>=1.0.0",
11 | "python-dotenv>=1.0.0",
12 | "tenacity>=8.0.0",
13 | ]
14 |
15 | [project.scripts]
16 | mcp-googlemaps = "mcp_googlemaps.server:main"
17 |
18 | [build-system]
19 | requires = ["hatchling"]
20 | build-backend = "hatchling.build"
--------------------------------------------------------------------------------
/src/art/dev/__init__.py:
--------------------------------------------------------------------------------
1 | from .engine import EngineArgs
2 | from .model import (
3 | InitArgs,
4 | InternalModelConfig,
5 | PeftArgs,
6 | TrainerArgs,
7 | )
8 | from .openai_server import OpenAIServerConfig, ServerArgs, get_openai_server_config
9 | from .torchtune import TorchtuneArgs
10 | from .train import TrainConfig
11 |
12 | __all__ = [
13 | "EngineArgs",
14 | "InternalModelConfig",
15 | "InitArgs",
16 | "PeftArgs",
17 | "TrainerArgs",
18 | "get_openai_server_config",
19 | "OpenAIServerConfig",
20 | "ServerArgs",
21 | "TorchtuneArgs",
22 | "TrainConfig",
23 | ]
24 |
--------------------------------------------------------------------------------
/dev/swebench/run.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from concurrent.futures import ThreadPoolExecutor
3 | from functools import partial
4 | from typing import Callable, ParamSpec, TypeVar
5 |
6 | executor = ThreadPoolExecutor(max_workers=1024)
7 |
8 | P = ParamSpec("P")
9 | R = TypeVar("R")
10 |
11 |
12 | async def run(
13 | func: Callable[P, R],
14 | in_thread: bool,
15 | *args: P.args,
16 | **kwargs: P.kwargs,
17 | ) -> R:
18 | if in_thread:
19 | return await asyncio.get_running_loop().run_in_executor(
20 | executor, partial(func, *args, **kwargs)
21 | )
22 | return func(*args, **kwargs)
23 |
--------------------------------------------------------------------------------
/dev/tau-bench/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from setuptools import find_packages, setup
4 |
5 | setup(
6 | name="tau_bench",
7 | version="0.1.0",
8 | description="The Tau-Bench package",
9 | long_description=open("README.md").read(),
10 | packages=find_packages(),
11 | include_package_data=True,
12 | install_requires=[
13 | "openai>=1.13.3",
14 | "mistralai>=0.4.0",
15 | "anthropic>=0.26.1",
16 | "google-generativeai>=0.5.4",
17 | "tenacity>=8.3.0",
18 | "termcolor>=2.4.0",
19 | "numpy>=1.26.4",
20 | "litellm>=1.41.0",
21 | ],
22 | )
23 |
--------------------------------------------------------------------------------
/dev/test_skypilot/launch.py:
--------------------------------------------------------------------------------
1 | """Training example for MCP agent using rollout with AlphaMcpServer in scenarios."""
2 |
3 | import asyncio
4 |
5 | from dotenv import load_dotenv
6 |
7 | from art.skypilot.backend import SkyPilotBackend
8 |
9 | load_dotenv()
10 |
11 |
12 | async def launch():
13 | backend = await SkyPilotBackend().initialize_cluster(
14 | cluster_name="test-skypilot",
15 | gpu="H100-SXM",
16 | env_path=".env",
17 | force_restart=True,
18 | )
19 |
20 | print("successfully initialized skypilot server")
21 |
22 |
23 | if __name__ == "__main__":
24 | asyncio.run(launch())
25 |
--------------------------------------------------------------------------------
/dev/swebench/sandbox/daytona.py:
--------------------------------------------------------------------------------
1 | import daytona_sdk
2 |
3 | from .sandbox import Provider, Sandbox
4 |
5 |
6 | class DaytonaSandbox(Sandbox):
7 | """
8 | Daytona sandbox.
9 |
10 | Wraps a Daytona sandbox with the shared Sandbox interface.
11 | """
12 |
13 | provider: Provider = "daytona"
14 |
15 | def __init__(self, sandbox: daytona_sdk.AsyncSandbox) -> None:
16 | self._sandbox = sandbox
17 |
18 | async def exec(self, command: str, timeout: int) -> tuple[int, str]:
19 | result = await self._sandbox.process.exec(command, timeout=timeout)
20 | return int(result.exit_code), result.result
21 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/model/exception.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import Generic, TypeVar
3 |
4 | T = TypeVar("T")
5 |
6 |
7 | class ModelError(Exception):
8 | def __init__(
9 | self,
10 | short_message: str,
11 | prompt: str | list[dict[str, str]] | None = None,
12 | response: str | None = None,
13 | ) -> None:
14 | super().__init__(short_message)
15 | self.short_message = short_message
16 | self.prompt = prompt
17 | self.response = response
18 |
19 |
20 | @dataclass
21 | class Result(Generic[T]):
22 | value: T | None
23 | error: ModelError | None
24 |
--------------------------------------------------------------------------------
/examples/hn_title_generator/skypilot.yaml:
--------------------------------------------------------------------------------
1 | # To launch, run the following command from the root directory of the art repository:
2 | # `uv run sky launch examples/hn_title_generator/skypilot.yaml --cluster=kyle-hn-title-generator-001 --env-file=.env --yes --retry-until-up --down --idle-minutes-to-autostop 10`
3 |
4 | workdir: .
5 | resources:
6 | accelerators: ["H100-SXM:1"]
7 | envs:
8 | HF_HUB_ENABLE_HF_TRANSFER: 1
9 |
10 | setup: |
11 | curl -LsSf https://astral.sh/uv/install.sh | sh
12 |
13 | source $HOME/.local/bin/env
14 |
15 | uv sync
16 |
17 | run: |
18 | echo "Running training script..."
19 | uv run python examples/hn_title_generator/train.py
20 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | import os
5 | from typing import Any
6 |
7 | FOLDER_PATH = os.path.dirname(__file__)
8 |
9 |
10 | def load_data() -> dict[str, Any]:
11 | with open(os.path.join(FOLDER_PATH, "orders.json")) as f:
12 | order_data = json.load(f)
13 | with open(os.path.join(FOLDER_PATH, "products.json")) as f:
14 | product_data = json.load(f)
15 | with open(os.path.join(FOLDER_PATH, "users.json")) as f:
16 | user_data = json.load(f)
17 | return {
18 | "orders": order_data,
19 | "products": product_data,
20 | "users": user_data,
21 | }
22 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | import os
5 | from typing import Any
6 |
7 | FOLDER_PATH = os.path.dirname(__file__)
8 |
9 |
10 | def load_data() -> dict[str, Any]:
11 | with open(os.path.join(FOLDER_PATH, "flights.json")) as f:
12 | flight_data = json.load(f)
13 | with open(os.path.join(FOLDER_PATH, "reservations.json")) as f:
14 | reservation_data = json.load(f)
15 | with open(os.path.join(FOLDER_PATH, "users.json")) as f:
16 | user_data = json.load(f)
17 | return {
18 | "flights": flight_data,
19 | "reservations": reservation_data,
20 | "users": user_data,
21 | }
22 |
--------------------------------------------------------------------------------
/dev/swebench/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "openpipe-art-swebench"
3 | version = "0.1.0"
4 | requires-python = ">=3.10"
5 | dependencies = [
6 | "aiolimiter>=1.2.1",
7 | "daytona-sdk>=0.21.5",
8 | "langfuse>=2.60.7",
9 | "modal>=1.0.1",
10 | "openpipe-art",
11 | "sweagent",
12 | "swebench>=4.0.3",
13 | ]
14 |
15 | [tool.uv.sources]
16 | openpipe-art = { path = "../../", editable = true }
17 | sweagent = { git = "https://github.com/bradhilton/SWE-agent" }
18 |
19 | [dependency-groups]
20 | dev = [
21 | "ipykernel>=6.29.5",
22 | "ipywidgets>=8.1.7",
23 | "pytest>=8.4.1",
24 | "pytest-asyncio>=1.0.0",
25 | "pytest-timeout>=2.4.0",
26 | "pytest-xdist>=3.8.0",
27 | ]
28 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/func_tools/map.py:
--------------------------------------------------------------------------------
1 | from concurrent.futures import ThreadPoolExecutor
2 | from typing import Callable, Iterable, TypeVar
3 |
4 | T = TypeVar("T")
5 | U = TypeVar("U")
6 |
7 |
8 | def map(
9 | func: Callable[[T], U],
10 | iterable: Iterable[T],
11 | max_concurrency: int | None = None,
12 | use_tqdm: bool = False,
13 | ) -> Iterable[U]:
14 | assert max_concurrency is None or max_concurrency > 0
15 | with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
16 | if use_tqdm:
17 | from tqdm import tqdm
18 |
19 | return list(tqdm(executor.map(func, iterable), total=len(iterable)))
20 | return executor.map(func, iterable)
21 |
--------------------------------------------------------------------------------
/dev/swebench/sandbox/modal.py:
--------------------------------------------------------------------------------
1 | import modal
2 |
3 | from .sandbox import Provider, Sandbox
4 |
5 |
6 | class ModalSandbox(Sandbox):
7 | """
8 | Modal sandbox.
9 |
10 | Wraps a Modal sandbox with the shared Sandbox interface.
11 | """
12 |
13 | provider: Provider = "modal"
14 |
15 | def __init__(self, sandbox: modal.Sandbox) -> None:
16 | self._sandbox = sandbox
17 |
18 | async def exec(self, command: str, timeout: int) -> tuple[int, str]:
19 | process = await self._sandbox.exec.aio(
20 | "/bin/sh", "-c", command, timeout=timeout
21 | )
22 | exit_code = await process.wait.aio()
23 | stdout = await process.stdout.read.aio()
24 | return exit_code, stdout
25 |
--------------------------------------------------------------------------------
/src/art/types.py:
--------------------------------------------------------------------------------
1 | from typing import Annotated, Literal
2 |
3 | import pydantic
4 | from openai.types.chat.chat_completion import Choice
5 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
6 | from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
7 | from pydantic import SkipValidation
8 |
9 | Message = Annotated[ChatCompletionMessageParam, SkipValidation]
10 | MessageOrChoice = Message | Choice
11 | Messages = list[Message]
12 | MessagesAndChoices = list[MessageOrChoice]
13 | Tools = list[ChatCompletionToolParam]
14 |
15 |
16 | class TrainConfig(pydantic.BaseModel):
17 | learning_rate: float = 5e-6
18 | beta: float = 0.0
19 |
20 |
21 | Verbosity = Literal[0, 1, 2]
22 |
--------------------------------------------------------------------------------
/src/art/utils/format_message.py:
--------------------------------------------------------------------------------
1 | from ..types import Message
2 |
3 |
4 | def format_message(message: Message) -> str:
5 | """Format a message into a readable string."""
6 | # Format the role and content
7 | role = message["role"].capitalize()
8 | content = message.get("content", message.get("refusal", "")) or ""
9 |
10 | # Format any tool calls
11 | tool_calls_text = "\n" if content else ""
12 | tool_calls_text += "\n".join(
13 | f"{tool_call['function']['name']}({tool_call['function']['arguments']})"
14 | for tool_call in message.get("tool_calls") or []
15 | )
16 |
17 | # Combine all parts
18 | formatted_message = f"{role}:\n{content}{tool_calls_text}"
19 | return formatted_message
20 |
--------------------------------------------------------------------------------
/examples/mcp-rl/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "art-mcp"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | dependencies = [
8 | "aiohttp>=3.12.14",
9 | "asyncio>=3.4.3",
10 | "click>=8.1.8",
11 | "mcp>=1.11.0",
12 | "openai>=1.74.0",
13 | "openpipe-art[skypilot]",
14 | "python-dotenv>=1.1.1",
15 | "tenacity>=9.1.2",
16 | "weave>=0.51.56",
17 | ]
18 |
19 |
20 | [tool.uv.sources]
21 | openpipe-art = { path = "../../", editable = true }
22 |
23 | [dependency-groups]
24 | dev = [
25 | "polars>=1.31.0",
26 | "ipywidgets>=8.1.6",
27 | "ipykernel>=6.29.5",
28 | "matplotlib>=3.10.3",
29 | "seaborn>=0.13.2",
30 | ]
31 |
--------------------------------------------------------------------------------
/examples/just-the-facts/just_the_facts/find_articles.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import feedparser
4 |
5 | feeds = {
6 | "NBC News Top Stories": "http://feeds.nbcnews.com/feeds/topstories",
7 | "BBC News Top Stories": "https://feeds.bbci.co.uk/news/rss.xml",
8 | "CBS News Top Stories": "http://www.cbsnews.com/latest/rss/main",
9 | "Fox News Latest": "http://feeds.foxnews.com/foxnews/latest",
10 | }
11 |
12 | all_urls = []
13 |
14 | for name, url in feeds.items():
15 | print(f"\n=== {name} ===")
16 | feed = feedparser.parse(url)
17 |
18 | for entry in feed.entries[:25]:
19 | print(entry.link)
20 | all_urls.append(entry.link)
21 |
22 |
23 | # shuffle
24 | random.shuffle(all_urls)
25 |
26 | print(all_urls)
27 |
--------------------------------------------------------------------------------
/src/art/utils/limit_concurrency.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from functools import wraps
3 | from typing import Callable, Optional
4 |
5 |
6 | def limit_concurrency(n: int, derive_key: Optional[Callable[..., str]] = None):
7 | semaphores = {}
8 |
9 | def decorator(func):
10 | @wraps(func)
11 | async def wrapper(*args, **kwargs):
12 | if derive_key:
13 | key = derive_key(*args, **kwargs)
14 | else:
15 | key = "default"
16 |
17 | if key not in semaphores:
18 | semaphores[key] = asyncio.Semaphore(n)
19 |
20 | async with semaphores[key]:
21 | return await func(*args, **kwargs)
22 |
23 | return wrapper
24 |
25 | return decorator
26 |
--------------------------------------------------------------------------------
/dev/test_skypilot/launch_tail.py:
--------------------------------------------------------------------------------
1 | """Training example for MCP agent using rollout with AlphaMcpServer in scenarios."""
2 |
3 | import asyncio
4 |
5 | from dotenv import load_dotenv
6 |
7 | from art.skypilot.backend import SkyPilotBackend
8 |
9 | load_dotenv()
10 |
11 |
12 | async def launch_tail():
13 | backend = await SkyPilotBackend().initialize_cluster(
14 | cluster_name="test-skypilot",
15 | gpu="H100-SXM",
16 | env_path=".env",
17 | force_restart=True,
18 | tail_logs=True,
19 | )
20 | print("successfully initialized skypilot server")
21 |
22 | # unforunately, we can't cancel the task programmatically, so we have to ctrl+c
23 | # to exit
24 |
25 |
26 | if __name__ == "__main__":
27 | asyncio.run(launch_tail())
28 |
--------------------------------------------------------------------------------
/src/art/utils/old_benchmarking/calculate_step_metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from art.trajectories import TrajectoryGroup
4 |
5 |
6 | # calculate the average standard deviation of rewards within groups
7 | def calculate_step_std_dev(trajectory_groups: list[TrajectoryGroup]) -> float:
8 | std_devs = []
9 | for group in trajectory_groups:
10 | group_rewards = []
11 |
12 | for trajectory in group.trajectories:
13 | if isinstance(trajectory, BaseException):
14 | continue
15 | group_rewards.append(trajectory.reward)
16 |
17 | if len(group_rewards) > 1:
18 | std_devs.append(np.std(group_rewards))
19 |
20 | if len(std_devs) == 0:
21 | return 0
22 |
23 | return sum(std_devs) / len(std_devs)
24 |
--------------------------------------------------------------------------------
/src/art/torchtune/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export MODEL_DIR=$(HF_HUB_ENABLE_HF_TRANSFER=1 uv run huggingface-cli download Qwen/Qwen3-32B | tail -n 1)
4 | export TORCHTUNE_DIR=$(uv run python -c "import torchtune; import os; print(os.path.dirname(torchtune.__file__))")
5 | uv run $TORCHTUNE_DIR/_cli/tune.py run \
6 | --nproc-per-node 8 \
7 | src/art/torchtune/recipe.py \
8 | --config ./src/art/torchtune/config.yaml \
9 | tokenizer.path=$MODEL_DIR/vocab.json \
10 | tokenizer.merges_file=$MODEL_DIR/merges.txt \
11 | checkpointer.checkpoint_dir=$MODEL_DIR \
12 | checkpointer.checkpoint_files="[$(ls $MODEL_DIR/*.safetensors | xargs -n1 basename | sed 's/^/"/;s/$/",/' | tr '\n' ' ' | sed 's/, $//' )]" \
13 | model._component_=torchtune.models.qwen3.qwen3_32b \
14 | "$@"
15 |
--------------------------------------------------------------------------------
/src/art/utils/log_http_errors.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 |
3 | import httpx
4 |
5 |
6 | def log_http_errors(func):
7 | @wraps(func)
8 | async def wrapper(*args, **kwargs):
9 | try:
10 | return await func(*args, **kwargs)
11 | except httpx.HTTPStatusError as e:
12 | # raise a new exception with the status code, url, and "detail" key if it exists
13 | try:
14 | detail = e.response.json().get("detail", None)
15 | except Exception:
16 | # if we can't parse the response as json, just raise the original exception
17 | raise e
18 | raise Exception(
19 | f"[HTTP {e.response.status_code}] {e.request.url} {detail}"
20 | ) from e
21 |
22 | return wrapper
23 |
--------------------------------------------------------------------------------
/dev/tau-bench/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "tau-bench"
3 | version = "0.1.0"
4 | requires-python = ">=3.11"
5 | dependencies = [
6 | "google-generativeai>=0.8.5",
7 | "langfuse>=2.60.8",
8 | "litellm>=1.72.6.post2",
9 | "mistralai>=1.8.2",
10 | "openpipe>=4.50.0",
11 | "openpipe-art",
12 | "skypilot-nightly[runpod,hyperbolic]==1.0.0.dev20250717",
13 | "tenacity>=9.1.2",
14 | "termcolor>=3.1.0",
15 | "openai>=1.74.0",
16 | "anthropic>=0.49.0",
17 | "accelerate==1.7.0",
18 | "vllm==0.9.1; sys_platform == 'linux'"
19 | ]
20 |
21 | [tool.uv]
22 | override-dependencies = ["vllm; sys_platform == 'linux'"]
23 |
24 | [tool.uv.sources]
25 | openpipe-art = { path = "../../", editable = true }
26 |
27 | [dependency-groups]
28 | dev = [
29 | "ipykernel>=6.29.5",
30 | "ipywidgets>=8.1.7",
31 | ]
32 |
--------------------------------------------------------------------------------
/examples/mcp-rl/mcp_rl/utils.py:
--------------------------------------------------------------------------------
1 | from mcp import types
2 |
3 |
4 | def get_content_text(result: types.CallToolResult) -> str:
5 | # Extract text content from MCP result
6 | if hasattr(result, "content") and result.content:
7 | if isinstance(result.content, list):
8 | # Handle list of content items
9 | content_text = ""
10 | for item in result.content:
11 | if isinstance(item, types.TextContent):
12 | content_text += item.text
13 | else:
14 | content_text += str(item)
15 | elif isinstance(result.content[0], types.TextContent):
16 | content_text = result.content[0].text
17 | else:
18 | content_text = str(result.content)
19 | else:
20 | content_text = str(result)
21 |
22 | return content_text
23 |
--------------------------------------------------------------------------------
/src/art/utils/deployment/__init__.py:
--------------------------------------------------------------------------------
1 | """Deployment utilities for deploying trained models to inference endpoints."""
2 |
3 | from .common import (
4 | DeploymentConfig,
5 | DeploymentResult,
6 | Provider,
7 | deploy_model,
8 | )
9 |
10 | # Legacy exports for backwards compatibility
11 | from .legacy import (
12 | LoRADeploymentJob,
13 | LoRADeploymentProvider,
14 | )
15 | from .together import (
16 | TogetherDeploymentConfig,
17 | )
18 | from .wandb import (
19 | WandbDeploymentConfig,
20 | deploy_wandb,
21 | )
22 |
23 | __all__ = [
24 | # New API
25 | "DeploymentConfig",
26 | "DeploymentResult",
27 | "Provider",
28 | "TogetherDeploymentConfig",
29 | "WandbDeploymentConfig",
30 | "deploy_model",
31 | "deploy_wandb",
32 | # Legacy API
33 | "LoRADeploymentJob",
34 | "LoRADeploymentProvider",
35 | ]
36 |
--------------------------------------------------------------------------------
/src/art/utils/get_model_step.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import TYPE_CHECKING
3 |
4 | from art.utils.output_dirs import get_model_dir
5 |
6 | if TYPE_CHECKING:
7 | from art.model import TrainableModel
8 |
9 |
10 | def get_step_from_dir(output_dir: str) -> int:
11 | os.makedirs(output_dir, exist_ok=True)
12 | checkpoint_dir = os.path.join(output_dir, "checkpoints")
13 | if not os.path.exists(checkpoint_dir):
14 | return 0
15 |
16 | return max(
17 | (
18 | int(subdir)
19 | for subdir in os.listdir(checkpoint_dir)
20 | if os.path.isdir(os.path.join(checkpoint_dir, subdir)) and subdir.isdigit()
21 | ),
22 | default=0,
23 | )
24 |
25 |
26 | def get_model_step(model: "TrainableModel", art_path: str) -> int:
27 | return get_step_from_dir(get_model_dir(model=model, art_path=art_path))
28 |
--------------------------------------------------------------------------------
/src/art/local/service.py:
--------------------------------------------------------------------------------
1 | from typing import AsyncIterator, Protocol, runtime_checkable
2 |
3 | from .. import dev, types
4 | from ..preprocessing.pack import DiskPackedTensors
5 |
6 |
7 | @runtime_checkable
8 | class ModelService(Protocol):
9 | def __init__(
10 | self,
11 | model_name: str,
12 | base_model: str,
13 | config: dev.InternalModelConfig,
14 | output_dir: str,
15 | ):
16 | pass
17 |
18 | async def start_openai_server(
19 | self, config: dev.OpenAIServerConfig | None
20 | ) -> None: ...
21 |
22 | async def vllm_engine_is_sleeping(self) -> bool: ...
23 |
24 | def train(
25 | self,
26 | disk_packed_tensors: DiskPackedTensors,
27 | config: types.TrainConfig,
28 | _config: dev.TrainConfig,
29 | verbose: bool = False,
30 | ) -> AsyncIterator[dict[str, float]]: ...
31 |
--------------------------------------------------------------------------------
/examples/just-the-facts/test_scraper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import asyncio
4 |
5 | from just_the_facts.scenarios import train_urls, val_urls
6 | from just_the_facts.utils import scrape_article
7 |
8 |
9 | async def test_scraper():
10 | """Test the scrape_article function with example URLs"""
11 |
12 | # Test URLs from different news sources (using homepage URLs that should exist)
13 | test_urls = train_urls + val_urls
14 |
15 | for url in test_urls:
16 | try:
17 | print(f"\nTesting URL: {url}")
18 | article_text = await scrape_article(url)
19 | print(f"Successfully scraped {len(article_text)} characters")
20 | print(f"First 200 characters: {article_text[:200]}...")
21 | except Exception as e:
22 | print(f"Failed to scrape {url}: {str(e)}")
23 | raise e
24 |
25 |
26 | if __name__ == "__main__":
27 | asyncio.run(test_scraper())
28 |
--------------------------------------------------------------------------------
/src/art/langgraph/logging.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pickle
3 |
4 |
5 | class FileLogger:
6 | def __init__(self, filepath):
7 | self.text_path = filepath
8 | self.pickle_path = filepath + ".pkl"
9 |
10 | def log(self, name, entry):
11 | # Log as readable text
12 | with open(self.text_path, "a") as f:
13 | f.write(f"{name}: {entry}\n")
14 |
15 | # Append to pickle log
16 | with open(self.pickle_path, "ab") as pf:
17 | pickle.dump((name, entry), pf)
18 |
19 | def load_logs(self):
20 | """Load all logs from the pickle file."""
21 | if not os.path.exists(self.pickle_path):
22 | return []
23 | logs = []
24 | with open(self.pickle_path, "rb") as pf:
25 | try:
26 | while True:
27 | logs.append(pickle.load(pf))
28 | except EOFError:
29 | pass
30 | return logs
31 |
--------------------------------------------------------------------------------
/src/art/utils/benchmark_rollout.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, Coroutine
2 |
3 | import art
4 |
5 | from ..trajectories import Trajectory, TrajectoryGroup
6 |
7 |
8 | async def benchmark_rollout(
9 | model: str,
10 | num_rollouts: int,
11 | rollout: Callable[[str, int, bool], Coroutine[Any, Any, Trajectory]],
12 | ) -> float:
13 | trajectory_groups = await art.gather_trajectory_groups(
14 | [TrajectoryGroup(rollout(model, i, False) for i in range(num_rollouts))],
15 | pbar_desc="Benchmarking rollout",
16 | )
17 |
18 | trajectory_group_rewards = []
19 |
20 | for group in trajectory_groups:
21 | total_reward = sum(trajectory.reward for trajectory in group)
22 | trajectory_group_rewards.append(total_reward / len(group))
23 |
24 | average_reward = sum(trajectory_group_rewards) / len(trajectory_group_rewards)
25 |
26 | print(f"Average reward for {model}: {average_reward}")
27 |
28 | return average_reward
29 |
--------------------------------------------------------------------------------
/src/art/utils/logging.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 |
4 | # ---------- lightweight "nice print" helpers ----------
5 | class _C:
6 | RESET = "\x1b[0m"
7 | DIM = "\x1b[2m"
8 | BOLD = "\x1b[1m"
9 | ITAL = "\x1b[3m"
10 | GRAY = "\x1b[90m"
11 | BLUE = "\x1b[34m"
12 | CYAN = "\x1b[36m"
13 | GREEN = "\x1b[32m"
14 | YELLOW = "\x1b[33m"
15 | RED = "\x1b[31m"
16 | MAGENTA = "\x1b[35m"
17 |
18 |
19 | def _ts():
20 | return time.strftime("%H:%M:%S")
21 |
22 |
23 | def info(msg):
24 | print(f"[{_ts()}] {_C.BLUE}INFO{_C.RESET} {msg}")
25 |
26 |
27 | def step(msg):
28 | print(f"[{_ts()}] {_C.CYAN}STEP{_C.RESET} {msg}")
29 |
30 |
31 | def ok(msg):
32 | print(f"[{_ts()}] {_C.GREEN}OK{_C.RESET} {msg}")
33 |
34 |
35 | def warn(msg):
36 | print(f"[{_ts()}] {_C.YELLOW}WARN{_C.RESET} {msg}")
37 |
38 |
39 | def err(msg):
40 | print(f"[{_ts()}] {_C.RED}ERR{_C.RESET} {msg}")
41 |
42 |
43 | def dim(msg):
44 | print(f"{_C.DIM}{msg}{_C.RESET}")
45 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/rules.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | RULES = [
4 | "You are a customer service representative for an online retail company. You are chatting with a customer, and you can call tools or respond to the user.",
5 | "The agent should always first confirm the user id by email or name+zip before proceeding with any task.",
6 | "The agent should not proceed with any task if the user id is not found.",
7 | "For any change to the backend database, e.g., address update, refund, or order cancellation, the agent must confirm the transaction details with the user and ask for permission, and get explicit authorization (yes) to proceed.",
8 | "The agent should solve the user task given the tools, without transferring to a human agent.",
9 | "The agent should not make up any information or knowledge not provided from the user or the tools.",
10 | "The agent should at most make one tool call at a time, and if the agent makes a tool call, it does not respond to the user at the same time.",
11 | ]
12 |
--------------------------------------------------------------------------------
/docs/analytics.js:
--------------------------------------------------------------------------------
1 | !(function () {
2 | var reb2b = (window.reb2b = window.reb2b || []);
3 | if (reb2b.invoked) return;
4 | reb2b.invoked = true;
5 | reb2b.methods = ["identify", "collect"];
6 | reb2b.factory = function (method) {
7 | return function () {
8 | var args = Array.prototype.slice.call(arguments);
9 | args.unshift(method);
10 | reb2b.push(args);
11 | return reb2b;
12 | };
13 | };
14 | for (var i = 0; i < reb2b.methods.length; i++) {
15 | var key = reb2b.methods[i];
16 | reb2b[key] = reb2b.factory(key);
17 | }
18 | reb2b.load = function (key) {
19 | var script = document.createElement("script");
20 | script.type = "text/javascript";
21 | script.async = true;
22 | script.src =
23 | "https://s3-us-west-2.amazonaws.com/b2bjsstore/b/" + key + "/reb2b.js.gz";
24 | var first = document.getElementsByTagName("script")[0];
25 | first.parentNode.insertBefore(script, first);
26 | };
27 | reb2b.SNIPPET_VERSION = "1.0.1";
28 | reb2b.load("4O7Z0HMXYWNX");
29 | })();
30 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | # I recommend setting your API key here if you're going to ssh into a new machine and use the local backend
2 | WANDB_API_KEY=YOUR_WANDB_API_KEY
3 |
4 | # Optional, git-related environment variables
5 | # You may need these if you want to make any git commits on a new machine
6 | GIT_USER_NAME="Your Name"
7 | GIT_USER_EMAIL=your.email@example.com
8 | # A GitHub token might be required for commiting to the private `agent-reinforcement-training` repository
9 | GITHUB_TOKEN=YOUR_GITHUB_TOKEN
10 |
11 | # HuggingFace Token (optional for most models, necessary for training gated models like Llama 3.1)
12 | HF_TOKEN=YOUR_HUGGINGFACE_TOKEN
13 |
14 | # Optional, OpenPipe API key
15 | OPENPIPE_API_KEY=YOUR_OPENPIPE_API_KEY
16 | # Optional, Together API key (used for deploying models to Together)
17 | TOGETHER_API_KEY=YOUR_TOGETHER_API_KEY
18 |
19 | # Optional, S3 configuration for log and model backups
20 | AWS_ACCESS_KEY_ID=YOUR_AWS_ACCESS_KEY_ID
21 | AWS_SECRET_ACCESS_KEY=YOUR_AWS_SECRET_ACCESS_KEY
22 | AWS_REGION=YOUR_AWS_REGION
23 | BACKUP_BUCKET=YOUR_BACKUP_BUCKET
--------------------------------------------------------------------------------
/examples/just-the-facts/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "just-the-facts"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.10"
7 | dependencies = [
8 | "aiohttp>=3.12.14",
9 | "asyncio>=3.4.3",
10 | "beautifulsoup4>=4.13.4",
11 | "click>=8.1.8",
12 | "feedparser>=6.0.11",
13 | "lxml>=6.0.0",
14 | "lxml-html-clean>=0.4.2",
15 | "mcp>=1.11.0",
16 | "newspaper3k>=0.2.8",
17 | "openai>=1.74.0",
18 | "openpipe-art[skypilot]",
19 | "python-dotenv>=1.1.1",
20 | "tenacity>=9.1.2",
21 | "weave>=0.51.56",
22 | ]
23 |
24 | [build-system]
25 | requires = ["setuptools>=61.0", "wheel"]
26 | build-backend = "setuptools.build_meta"
27 |
28 | [tool.setuptools.packages.find]
29 | where = ["."]
30 | include = ["just_the_facts*"]
31 |
32 | [tool.uv.sources]
33 | openpipe-art = { path = "../../", editable = true }
34 |
35 | [dependency-groups]
36 | dev = [
37 | "polars>=1.31.0",
38 | "ipywidgets>=8.1.6",
39 | "ipykernel>=6.29.5",
40 | "matplotlib>=3.10.3",
41 | "seaborn>=0.13.2",
42 | ]
43 |
--------------------------------------------------------------------------------
/src/art/utils/old_benchmarking/generate_comparison_table.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | from .load_benchmarked_models import load_benchmarked_models
4 | from .types import BenchmarkedModelKey
5 |
6 |
7 | def generate_comparison_table(
8 | project: str,
9 | benchmark_keys: list[BenchmarkedModelKey],
10 | metrics: list[str] = ["reward"],
11 | api_path: str = "./.art",
12 | ) -> pd.DataFrame:
13 | benchmarked_models = load_benchmarked_models(
14 | project, benchmark_keys, metrics, api_path
15 | )
16 |
17 | rows: list[dict[str, str]] = []
18 |
19 | for benchmarked_model in benchmarked_models:
20 | for step in benchmarked_model.steps:
21 | row = {
22 | "Model": benchmarked_model.model_key.model,
23 | "Split": benchmarked_model.model_key.split,
24 | "Step": f"{step.index:04d}",
25 | }
26 | for metric in metrics:
27 | row[metric] = str(step.metrics.get(metric, "N/A"))
28 | rows.append(row)
29 |
30 | return pd.DataFrame(rows, columns=pd.Index(["Model", "Split", "Step"] + metrics))
31 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/think.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class Think(Tool):
9 | @staticmethod
10 | def invoke(data: Dict[str, Any], thought: str) -> str:
11 | return ""
12 |
13 | @staticmethod
14 | def get_info() -> Dict[str, Any]:
15 | return {
16 | "type": "function",
17 | "function": {
18 | "name": "think",
19 | "description": "Use the tool to think about something. It will not obtain new information or change the database, but just append the thought to the log. Use it when complex reasoning is needed.",
20 | "parameters": {
21 | "type": "object",
22 | "properties": {
23 | "thought": {
24 | "type": "string",
25 | "description": "A thought to think about.",
26 | },
27 | },
28 | "required": ["thought"],
29 | },
30 | },
31 | }
32 |
--------------------------------------------------------------------------------
/dev/tau-bench/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Sierra
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/dev/new_models/prompts.json:
--------------------------------------------------------------------------------
1 | ["respond with 'yes', 'no', 'maybe'", "respond with 'maybe', 'yes', 'no'", "respond with 'no', 'yes', 'maybe'", "respond with 'yes', 'maybe', 'no'", "respond with yes or no", "respond with maybe or no", "respond with no or maybe", "respond with no or yes", "respond with yes or no", "respond with yes, no, maybe", "respond with maybe, yes, no", "respond with no, yes, maybe", "respond with yes, maybe, no", "respond with yes or no", "respond with maybe or no", "respond with no or maybe", "respond with no or yes", "respond with yes or no", "just respond with 'yes', 'no', 'maybe'", "just respond with 'maybe', 'yes', 'no'", "just respond with 'no', 'yes', 'maybe'", "just respond with 'yes', 'maybe', 'no'", "just respond with yes or no", "just respond with maybe or no", "just respond with no or maybe", "just respond with no or yes", "just respond with yes or no", "just respond with yes, no, maybe", "just respond with maybe, yes, no", "just respond with no, yes, maybe", "just respond with yes, maybe, no", "just respond with yes or no", "just respond with maybe or no", "just respond with no or maybe", "just respond with no or yes", "just respond with yes or no"]
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/model/vllm_utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | import requests
4 |
5 | from tau_bench.model_utils.model.general_model import wrap_temperature
6 |
7 |
8 | def generate_request(
9 | url: str,
10 | prompt: str,
11 | temperature: float = 0.0,
12 | force_json: bool = False,
13 | **req_body_kwargs: Any,
14 | ) -> str:
15 | args = {
16 | "prompt": prompt,
17 | "temperature": wrap_temperature(temperature),
18 | "max_tokens": 4096,
19 | **req_body_kwargs,
20 | }
21 | if force_json:
22 | # the prompt will have a suffix of '```json\n' to indicate that the response should be a JSON object
23 | args["stop"] = ["```"]
24 | res = requests.post(
25 | url,
26 | json=args,
27 | )
28 | res.raise_for_status()
29 | json_res = res.json()
30 | if "text" not in json_res:
31 | raise ValueError(f"Unexpected response: {json_res}")
32 | elif len(json_res["text"]) == 0:
33 | raise ValueError(f"Empty response: {json_res}")
34 | text = json_res["text"][0]
35 | assert isinstance(text, str)
36 | return text.removeprefix(prompt)
37 |
--------------------------------------------------------------------------------
/src/art/yield_trajectory.py:
--------------------------------------------------------------------------------
1 | import contextvars
2 | from typing import Any, Coroutine
3 |
4 | from .trajectories import Trajectory
5 |
6 |
7 | def yield_trajectory(trajectory: Trajectory) -> None:
8 | yield_trajectory_context_var.get().trajectory = trajectory
9 |
10 |
11 | async def capture_yielded_trajectory(coroutine: Coroutine[Any, Any, Any]) -> Trajectory:
12 | with YieldTrajectoryContext():
13 | await coroutine
14 | trajectory = yield_trajectory_context_var.get().trajectory
15 | if trajectory is None:
16 | raise RuntimeError("No trajectory yielded")
17 | return trajectory
18 |
19 |
20 | class YieldTrajectoryContext:
21 | def __init__(self) -> None:
22 | self.trajectory: Trajectory | None = None
23 |
24 | def __enter__(self) -> None:
25 | self.token = yield_trajectory_context_var.set(self)
26 |
27 | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
28 | yield_trajectory_context_var.reset(self.token)
29 |
30 |
31 | yield_trajectory_context_var: contextvars.ContextVar[YieldTrajectoryContext] = (
32 | contextvars.ContextVar("yield_trajectory_context", default=YieldTrajectoryContext())
33 | )
34 |
--------------------------------------------------------------------------------
/src/art/dev/train.py:
--------------------------------------------------------------------------------
1 | from typing import Literal
2 |
3 | from typing_extensions import TypedDict
4 |
5 |
6 | class TrainConfig(TypedDict, total=False):
7 | advantage_balance: float
8 | """Balance between negative and positive advantages in the range [-1.0, 1.0]. \
9 | -1.0 means only training on negative advantages, 1.0 means only training on \
10 | positive advantages. Defaults to 0.0 (perfectly balanced)."""
11 | allow_training_without_logprobs: bool
12 | epsilon: float # clip epsilon, using the same name as TRL
13 | epsilon_high: (
14 | float | None
15 | ) # asymmetric clip upper bound. Defaults to epsilon when None
16 | importance_sampling_level: Literal[
17 | "token", "sequence", "average", "geometric_average"
18 | ]
19 | kimi_k2_tau: float | None
20 | logprob_calculation_chunk_size: int
21 | mask_prob_ratio: bool
22 | max_negative_advantage_importance_sampling_weight: float
23 | num_trajectories_learning_rate_multiplier_power: float
24 | plot_tensors: bool
25 | ppo: bool
26 | precalculate_logprobs: bool
27 | scale_learning_rate_by_reward_std_dev: bool
28 | scale_rewards: bool
29 | truncated_importance_sampling: float | None
30 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/get_user_details.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class GetUserDetails(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any], user_id: str) -> str:
12 | users = data["users"]
13 | if user_id in users:
14 | return json.dumps(users[user_id])
15 | return "Error: user not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "get_user_details",
23 | "description": "Get the details of a user, including their orders.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "user_id": {
28 | "type": "string",
29 | "description": "The user id, such as 'sara_doe_496'.",
30 | },
31 | },
32 | "required": ["user_id"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
1 | name: Code Quality Checks
2 |
3 | on:
4 | pull_request:
5 | branches: [ main ]
6 | push:
7 | branches: [ main ]
8 |
9 | jobs:
10 | quality-checks:
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - name: Checkout code
15 | uses: actions/checkout@v4
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: '3.10'
21 |
22 | - name: Install uv
23 | run: |
24 | curl -LsSf https://astral.sh/uv/install.sh | sh
25 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH
26 |
27 | - name: Install dependencies
28 | run: |
29 | uv sync --all-extras
30 |
31 | - name: Run code quality checks
32 | run: |
33 | ./scripts/run_checks.sh --verbose-test-failure || {
34 | echo ""
35 | echo "❌ Code quality checks failed!"
36 | echo ""
37 | echo "To fix these issues locally, run:"
38 | echo " ./scripts/run_checks.sh --fix"
39 | echo ""
40 | echo "Then commit and push the changes."
41 | echo ""
42 | echo "For more details, see CONTRIBUTING.md"
43 | exit 1
44 | }
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/get_user_details.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class GetUserDetails(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any], user_id: str) -> str:
12 | users = data["users"]
13 | if user_id in users:
14 | return json.dumps(users[user_id])
15 | return "Error: user not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "get_user_details",
23 | "description": "Get the details of an user, including their reservations.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "user_id": {
28 | "type": "string",
29 | "description": "The user id, such as 'sara_doe_496'.",
30 | },
31 | },
32 | "required": ["user_id"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/list_all_product_types.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class ListAllProductTypes(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any]) -> str:
12 | products = data["products"]
13 | product_dict = {
14 | product["name"]: product["product_id"] for product in products.values()
15 | }
16 | product_dict = dict(sorted(product_dict.items()))
17 | return json.dumps(product_dict)
18 |
19 | @staticmethod
20 | def get_info() -> Dict[str, Any]:
21 | return {
22 | "type": "function",
23 | "function": {
24 | "name": "list_all_product_types",
25 | "description": "List the name and product id of all product types. Each product type has a variety of different items with unique item ids and options. There are only 50 product types in the store.",
26 | "parameters": {
27 | "type": "object",
28 | "properties": {},
29 | "required": [],
30 | },
31 | },
32 | }
33 |
--------------------------------------------------------------------------------
/src/art/utils/benchmarking/filter_model_split.py:
--------------------------------------------------------------------------------
1 | try:
2 | import polars as pl
3 | except ImportError:
4 | raise ImportError(
5 | "Plotting dependencies are not installed. Please install them with: "
6 | "pip install openpipe-art[plotting]"
7 | )
8 |
9 | from art.utils.benchmarking.types import BenchmarkModelKey
10 |
11 |
12 | def filter_rename_model_split(
13 | df: pl.DataFrame, models: list[BenchmarkModelKey]
14 | ) -> pl.DataFrame:
15 | # filter by combinations of name + split
16 | z = pl.fold(
17 | acc=pl.lit(False),
18 | function=lambda acc, expr: acc | expr,
19 | exprs=[
20 | (pl.col("model") == model.name) & (pl.col("split") == model.split)
21 | for model in models
22 | ],
23 | )
24 |
25 | df = df.filter(z)
26 |
27 | for model in models:
28 | if model.name != model.display_name:
29 | df = df.with_columns(
30 | pl.when(
31 | (pl.col("model") == model.name) & (pl.col("split") == model.split)
32 | )
33 | .then(pl.lit(model.display_name))
34 | .otherwise(pl.col("model"))
35 | .alias("model")
36 | )
37 |
38 | return df
39 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Optional, Union
4 |
5 | from tau_bench.envs.base import Env
6 | from tau_bench.envs.user import UserStrategy
7 |
8 |
9 | def get_env(
10 | env_name: str,
11 | user_strategy: Union[str, UserStrategy],
12 | user_model: str,
13 | task_split: str,
14 | user_provider: Optional[str] = None,
15 | task_index: Optional[int] = None,
16 | ) -> Env:
17 | if env_name == "retail":
18 | from tau_bench.envs.retail import MockRetailDomainEnv
19 |
20 | return MockRetailDomainEnv(
21 | user_strategy=user_strategy,
22 | user_model=user_model,
23 | task_split=task_split,
24 | user_provider=user_provider,
25 | task_index=task_index,
26 | )
27 | elif env_name == "airline":
28 | from tau_bench.envs.airline import MockAirlineDomainEnv
29 |
30 | return MockAirlineDomainEnv(
31 | user_strategy=user_strategy,
32 | user_model=user_model,
33 | task_split=task_split,
34 | user_provider=user_provider,
35 | task_index=task_index,
36 | )
37 | else:
38 | raise ValueError(f"Unknown environment: {env_name}")
39 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from .book_reservation import BookReservation
4 | from .calculate import Calculate
5 | from .cancel_reservation import CancelReservation
6 | from .get_reservation_details import GetReservationDetails
7 | from .get_user_details import GetUserDetails
8 | from .list_all_airports import ListAllAirports
9 | from .search_direct_flight import SearchDirectFlight
10 | from .search_onestop_flight import SearchOnestopFlight
11 | from .send_certificate import SendCertificate
12 | from .think import Think
13 | from .transfer_to_human_agents import TransferToHumanAgents
14 | from .update_reservation_baggages import UpdateReservationBaggages
15 | from .update_reservation_flights import UpdateReservationFlights
16 | from .update_reservation_passengers import UpdateReservationPassengers
17 |
18 | ALL_TOOLS = [
19 | BookReservation,
20 | Calculate,
21 | CancelReservation,
22 | GetReservationDetails,
23 | GetUserDetails,
24 | ListAllAirports,
25 | SearchDirectFlight,
26 | SearchOnestopFlight,
27 | SendCertificate,
28 | Think,
29 | TransferToHumanAgents,
30 | UpdateReservationBaggages,
31 | UpdateReservationFlights,
32 | UpdateReservationPassengers,
33 | ]
34 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # OpenPipe Documentation
2 |
3 | This repository contains the source for the ART documentation website hosted at [https://art.openpipe.ai](https://art.openpipe.ai).
4 |
5 | ## Prerequisites
6 |
7 | Ensure you have the following packages installed on your machine:
8 |
9 | - [pnpm](https://pnpm.io/installation)
10 | - [node](https://nodejs.org/en/download/)
11 |
12 | ## Contributing
13 |
14 | To edit the documentation follow these steps:
15 |
16 | 1. Clone the repository
17 | 2. Navigate to the `docs` directory
18 | 3. Run `pnpm install` to install the dependencies
19 | 4. Run `pnpm dev` to start the development server
20 | 5. Edit the files in the `docs` directory
21 |
22 | Edits to files should immediately be reflected in the development server.
23 |
24 | ### Adding new pages
25 |
26 | 1. Create a new .mdx file in the `docs` directory
27 | 2. Navigate to the `mint.json` file and add the new page to the appropriate section to the `navigation` array, or create a new section. Ensure that the path to the new page is correct.
28 |
29 | ### Deploying changes
30 |
31 | To deploy changes to the hosted docs, commit your changes in a new git branch and create a pull request. Once the pull request is merged, the changes will be deployed to the hosted docs.
32 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/get_reservation_details.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class GetReservationDetails(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any], reservation_id: str) -> str:
12 | reservations = data["reservations"]
13 | if reservation_id in reservations:
14 | return json.dumps(reservations[reservation_id])
15 | return "Error: user not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "get_reservation_details",
23 | "description": "Get the details of a reservation.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "reservation_id": {
28 | "type": "string",
29 | "description": "The reservation id, such as '8JX2WO'.",
30 | },
31 | },
32 | "required": ["reservation_id"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/get_order_details.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class GetOrderDetails(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any], order_id: str) -> str:
12 | orders = data["orders"]
13 | if order_id in orders:
14 | return json.dumps(orders[order_id])
15 | return "Error: order not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "get_order_details",
23 | "description": "Get the status and details of an order.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "order_id": {
28 | "type": "string",
29 | "description": "The order id, such as '#W0000000'. Be careful there is a '#' symbol at the beginning of the order id.",
30 | },
31 | },
32 | "required": ["order_id"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/examples/mcp-rl/servers/python/mcp_alphavantage/README.md:
--------------------------------------------------------------------------------
1 | # MCP AlphaVantage Python Server
2 |
3 | A Python implementation of the MCP server for Alpha Vantage financial data API.
4 |
5 | ## Features
6 |
7 | - Real-time stock quotes
8 | - Daily time series data
9 | - Symbol search
10 | - Company overview/fundamentals
11 | - Technical indicators (SMA, RSI)
12 |
13 | ## Setup
14 |
15 | 1. Get an API key from [Alpha Vantage](https://www.alphavantage.co/support/#api-key)
16 | 2. Set the environment variable:
17 | ```bash
18 | export ALPHAVANTAGE_API_KEY=your_api_key_here
19 | ```
20 |
21 | ## Usage
22 |
23 | ### Command Line
24 | ```bash
25 | python server.py --api-key YOUR_API_KEY
26 | ```
27 |
28 | ### With Environment Variable
29 | ```bash
30 | export ALPHAVANTAGE_API_KEY=your_api_key
31 | python server.py
32 | ```
33 |
34 | ### Available Tools
35 |
36 | - `get_stock_quote`: Get real-time stock quote
37 | - `get_time_series_daily`: Get daily stock data
38 | - `search_symbol`: Search for stock symbols
39 | - `get_company_overview`: Get company fundamentals
40 | - `get_sma`: Simple Moving Average indicator
41 | - `get_rsi`: Relative Strength Index indicator
42 |
43 | ## Transport Options
44 |
45 | - `stdio` (default): Standard input/output transport
46 | - `sse`: Server-sent events over HTTP
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/get_product_details.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | import json
4 | from typing import Any, Dict
5 |
6 | from tau_bench.envs.tool import Tool
7 |
8 |
9 | class GetProductDetails(Tool):
10 | @staticmethod
11 | def invoke(data: Dict[str, Any], product_id: str) -> str:
12 | products = data["products"]
13 | if product_id in products:
14 | return json.dumps(products[product_id])
15 | return "Error: product not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "get_product_details",
23 | "description": "Get the inventory details of a product.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "product_id": {
28 | "type": "string",
29 | "description": "The product id, such as '6086499569'. Be careful the product id is different from the item id.",
30 | },
31 | },
32 | "required": ["product_id"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/find_user_id_by_email.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class FindUserIdByEmail(Tool):
9 | @staticmethod
10 | def invoke(data: Dict[str, Any], email: str) -> str:
11 | users = data["users"]
12 | for user_id, profile in users.items():
13 | if profile["email"].lower() == email.lower():
14 | return user_id
15 | return "Error: user not found"
16 |
17 | @staticmethod
18 | def get_info() -> Dict[str, Any]:
19 | return {
20 | "type": "function",
21 | "function": {
22 | "name": "find_user_id_by_email",
23 | "description": "Find user id by email. If the user is not found, the function will return an error message.",
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "email": {
28 | "type": "string",
29 | "description": "The email of the user, such as 'something@example.com'.",
30 | },
31 | },
32 | "required": ["email"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/src/art/transformers/patches.py:
--------------------------------------------------------------------------------
1 | from typing import TYPE_CHECKING, Optional, Union
2 |
3 | import torch
4 | from transformers import masking_utils
5 | from transformers.cache_utils import Cache
6 | from transformers.configuration_utils import PretrainedConfig
7 |
8 | if TYPE_CHECKING:
9 | from torch.nn.attention.flex_attention import BlockMask
10 |
11 | _preprocess_mask_arguments = masking_utils._preprocess_mask_arguments
12 |
13 |
14 | def _patched_preprocess_mask_arguments(
15 | config: PretrainedConfig,
16 | input_embeds: torch.Tensor,
17 | attention_mask: Optional[Union[torch.Tensor, "BlockMask"]],
18 | cache_position: torch.Tensor,
19 | past_key_values: Optional[Cache],
20 | position_ids: Optional[torch.Tensor],
21 | layer_idx: Optional[int],
22 | ) -> tuple[bool, Optional[Union[torch.Tensor, "BlockMask"]], int, int]:
23 | if position_ids is not None and len(position_ids.shape) == 3:
24 | position_ids = position_ids[0]
25 | return _preprocess_mask_arguments(
26 | config,
27 | input_embeds,
28 | attention_mask,
29 | cache_position,
30 | past_key_values,
31 | position_ids,
32 | layer_idx,
33 | )
34 |
35 |
36 | def patch_preprocess_mask_arguments() -> None:
37 | masking_utils._preprocess_mask_arguments = _patched_preprocess_mask_arguments
38 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/transfer_to_human_agents.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class TransferToHumanAgents(Tool):
9 | @staticmethod
10 | def invoke(
11 | data: Dict[str, Any],
12 | summary: str,
13 | ) -> str:
14 | return "Transfer successful"
15 |
16 | @staticmethod
17 | def get_info() -> Dict[str, Any]:
18 | return {
19 | "type": "function",
20 | "function": {
21 | "name": "transfer_to_human_agents",
22 | "description": "Transfer the user to a human agent, with a summary of the user's issue. Only transfer if the user explicitly asks for a human agent, or if the user's issue cannot be resolved by the agent with the available tools.",
23 | "parameters": {
24 | "type": "object",
25 | "properties": {
26 | "summary": {
27 | "type": "string",
28 | "description": "A summary of the user's issue.",
29 | },
30 | },
31 | "required": [
32 | "summary",
33 | ],
34 | },
35 | },
36 | }
37 |
--------------------------------------------------------------------------------
/scripts/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Load environment variables from .env file if it exists
4 | if [ -f .env ]; then
5 | # Read .env file line by line, ignoring comments and empty lines
6 | while IFS= read -r line || [ -n "$line" ]; do
7 | # Skip comments and empty lines
8 | [[ $line =~ ^#.*$ ]] && continue
9 | [[ -z $line ]] && continue
10 |
11 | # Export the variable
12 | export "$line"
13 | done < .env
14 | fi
15 |
16 | # Configure git user name and email
17 | git config --global user.name "${GIT_USER_NAME}"
18 | git config --global user.email "${GIT_USER_EMAIL}"
19 | git config --global --add safe.directory /root/sky_workdir
20 |
21 | if [ "${GIT_RESET_CLEAN:-true}" = "true" ]; then
22 | # Reset any uncommitted changes to the last commit
23 | git reset --hard HEAD
24 |
25 | # Remove all untracked files and directories
26 | git clean -fd
27 | else
28 | echo "Skipping git reset/clean (GIT_RESET_CLEAN is not true). Preserving synced working tree."
29 | fi
30 |
31 | # Install astral-uv
32 | sudo snap install --classic astral-uv
33 |
34 | # Update uv
35 | uv self update
36 |
37 | # Install tmux
38 | apt install tmux -y
39 |
40 | # Sync the dependencies
41 | if [ "${INSTALL_EXTRAS:-false}" = "true" ]; then
42 | uv sync --all-extras
43 | else
44 | uv sync --extra backend
45 | fi
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/think.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class Think(Tool):
9 | @staticmethod
10 | def invoke(data: Dict[str, Any], thought: str) -> str:
11 | # This method does not change the state of the data; it simply returns an empty string.
12 | return "Thought Completed"
13 |
14 | @staticmethod
15 | def get_info() -> Dict[str, Any]:
16 | return {
17 | "type": "function",
18 | "function": {
19 | "name": "think",
20 | "description": (
21 | "Use the tool to think about something. It will not obtain new information or change the database, "
22 | "but just append the thought to the log. Use it when complex reasoning or some cache memory is needed."
23 | ),
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "thought": {
28 | "type": "string",
29 | "description": "A thought to think about.",
30 | },
31 | },
32 | "required": ["thought"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/dev/test_skypilot/register_model.py:
--------------------------------------------------------------------------------
1 | """Training example for MCP agent using rollout with AlphaMcpServer in scenarios."""
2 |
3 | import asyncio
4 |
5 | from dotenv import load_dotenv
6 | from pydantic import BaseModel
7 |
8 | import art
9 | from art.skypilot.backend import SkyPilotBackend
10 |
11 | load_dotenv()
12 |
13 |
14 | class ComplexModelConfig(BaseModel):
15 | max_turns: int = 5
16 | max_tokens: int = 2048
17 |
18 | base_model: str = "Qwen/Qwen2.5-14B-Instruct"
19 | # Random seed to control which subset of the training data is sampled
20 | training_dataset_seed: int | None = None
21 |
22 | # Training configuration
23 | scale_rewards: bool = True
24 |
25 |
26 | async def register_model():
27 | backend = await SkyPilotBackend().initialize_cluster(
28 | cluster_name="test-skypilot",
29 | gpu="H100-SXM",
30 | env_path=".env",
31 | # force_restart=True,
32 | )
33 |
34 | model = art.TrainableModel(
35 | name="complex-model",
36 | project="test-skypilot",
37 | base_model="Qwen/Qwen2.5-14B-Instruct",
38 | config=ComplexModelConfig(
39 | num_epochs=160,
40 | ),
41 | )
42 |
43 | await backend.register(model)
44 |
45 | print("model registered")
46 |
47 |
48 | if __name__ == "__main__":
49 | asyncio.run(register_model())
50 |
--------------------------------------------------------------------------------
/src/art/vllm/__init__.py:
--------------------------------------------------------------------------------
1 | """vLLM integration module for art."""
2 |
3 | # Server functionality
4 | # Engine and worker management
5 | from .engine import (
6 | WorkerExtension,
7 | create_engine_pause_and_resume_functions,
8 | get_llm,
9 | get_worker,
10 | run_on_workers,
11 | )
12 |
13 | # Patches - these are typically imported for their side effects
14 | from .patches import (
15 | patch_allocator,
16 | patch_get_lora_tokenizer_async,
17 | patch_listen_for_disconnect,
18 | patch_lora_request,
19 | patch_multi_step_model_runner,
20 | patch_tool_parser_manager,
21 | subclass_chat_completion_request,
22 | )
23 | from .server import (
24 | get_uvicorn_logging_config,
25 | openai_server_task,
26 | set_vllm_log_file,
27 | )
28 |
29 | __all__ = [
30 | # Server
31 | "openai_server_task",
32 | "get_uvicorn_logging_config",
33 | "set_vllm_log_file",
34 | # Engine
35 | "get_llm",
36 | "create_engine_pause_and_resume_functions",
37 | "run_on_workers",
38 | "get_worker",
39 | "WorkerExtension",
40 | # Patches
41 | "patch_allocator",
42 | "subclass_chat_completion_request",
43 | "patch_lora_request",
44 | "patch_get_lora_tokenizer_async",
45 | "patch_listen_for_disconnect",
46 | "patch_tool_parser_manager",
47 | "patch_multi_step_model_runner",
48 | ]
49 |
--------------------------------------------------------------------------------
/examples/hn_title_generator/skypilot-reference-grpo-trainer.yaml:
--------------------------------------------------------------------------------
1 | # To launch, run the following command from the root directory of the art repository:
2 | # `uv run sky launch examples/hn_title_generator/skypilot-reference-grpo-trainer.yaml --cluster=kyle-hn-title-generator-002 --env-file=.env --yes --retry-until-up --down --idle-minutes-to-autostop 60`
3 |
4 | resources:
5 | image_id: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
6 | cloud: runpod
7 | region: US
8 | accelerators:
9 | - "H100-SXM"
10 |
11 | workdir: .
12 |
13 | envs:
14 | HF_HUB_ENABLE_HF_TRANSFER: 1
15 | VLLM_CONFIGURE_LOGGING: 0
16 |
17 | setup: |
18 | apt-get update && apt-get install -y git
19 |
20 | curl -LsSf https://astral.sh/uv/install.sh | sh
21 |
22 | # Source the environment to make uv available
23 | source $HOME/.local/bin/env
24 |
25 | uv pip install --system \
26 | unsloth==2025.3.19 \
27 | vllm==0.8.2 \
28 | bitsandbytes==0.45.4 \
29 | datasets==3.3.2 \
30 | s3fs==2024.12.0 \
31 | hf-transfer==0.1.9 \
32 | typer==0.15.2 \
33 | fastapi==0.115.11 \
34 | python-dotenv==1.0.1 \
35 | polars==1.24.0 \
36 | wandb==0.19.8 \
37 | git+https://github.com/corbt/panza.git \
38 |
39 | echo "Setup complete"
40 |
41 | run: |
42 | echo "Running train_grpo.py"
43 | uv run python examples/hn_title_generator/reference_grpo_trainer.py
44 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/model_utils/model/outlines_completion.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 |
3 | from pydantic import BaseModel
4 |
5 | from tau_bench.model_utils.api.datapoint import Datapoint
6 | from tau_bench.model_utils.model.vllm_completion import VLLMCompletionModel
7 | from tau_bench.model_utils.model.vllm_utils import generate_request
8 |
9 |
10 | class OutlinesCompletionModel(VLLMCompletionModel):
11 | def parse_force_from_prompt(
12 | self, prompt: str, typ: BaseModel, temperature: float | None = None
13 | ) -> dict[str, Any]:
14 | if temperature is None:
15 | temperature = self.temperature
16 | schema = typ.model_json_schema()
17 | res = generate_request(
18 | url=self.url,
19 | prompt=prompt,
20 | force_json=True,
21 | schema=schema,
22 | temperature=temperature,
23 | )
24 | return self.handle_parse_force_response(prompt=prompt, content=res)
25 |
26 | def get_approx_cost(self, dp: Datapoint) -> float:
27 | return super().get_approx_cost(dp)
28 |
29 | def get_latency(self, dp: Datapoint) -> float:
30 | return super().get_latency(dp)
31 |
32 | def get_capability(self) -> float:
33 | return super().get_capability()
34 |
35 | def supports_dp(self, dp: Datapoint) -> bool:
36 | return super().supports_dp(dp)
37 |
--------------------------------------------------------------------------------
/scripts/launch-cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | CLUSTER_NAME="art"
4 |
5 | # Parse arguments
6 | ARGS=()
7 | PULL_LATEST=true
8 | while [[ $# -gt 0 ]]; do
9 | case "$1" in
10 | -c)
11 | CLUSTER_NAME="$2"
12 | shift 2
13 | ;;
14 | --no-pull)
15 | PULL_LATEST=false
16 | shift 1
17 | ;;
18 | *)
19 | ARGS+=("$1")
20 | shift
21 | ;;
22 | esac
23 | done
24 |
25 | # Check for unstaged changes
26 | if ! git diff --quiet; then
27 | echo "Warning: You have unstaged changes. Unstaged changes will be discarded from the cluster working directory."
28 | fi
29 |
30 | # Check for uncommitted changes
31 | if ! git diff --cached --quiet; then
32 | echo "Warning: You have uncommitted changes. Uncommitted changes will be discarded from the cluster working directory."
33 | fi
34 |
35 | if [[ "$PULL_LATEST" == true ]]; then
36 | echo "Pulling latest changes..."
37 | if ! git pull; then
38 | echo "Error: Failed to pull latest changes."
39 | exit 1
40 | fi
41 | else
42 | echo "Skipping git pull (deploying current working tree). To pull latest, omit --no-pull."
43 | # Preserve synced working tree on remote by disabling reset/clean.
44 | ARGS+=(--env "GIT_RESET_CLEAN=false")
45 | fi
46 |
47 | # Launch the cluster
48 | uv run sky launch skypilot-config.yaml -c "$CLUSTER_NAME" --env-file .env -y "${ARGS[@]}"
--------------------------------------------------------------------------------
/src/art/utils/output_dirs.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from art.model import Model
4 | from art.utils.get_repo_root_path import get_repo_root_path
5 |
6 |
7 | def get_default_art_path() -> str:
8 | root_path = get_repo_root_path()
9 | return os.path.join(root_path, ".art")
10 |
11 |
12 | def get_models_dir(project_name: str, art_path: str | None = None) -> str:
13 | if art_path is None:
14 | art_path = get_default_art_path()
15 | return f"{art_path}/{project_name}/models"
16 |
17 |
18 | def get_model_dir(model: Model, art_path: str | None = None) -> str:
19 | if art_path is None:
20 | art_path = get_default_art_path()
21 | return f"{art_path}/{model.project}/models/{model.name}"
22 |
23 |
24 | def get_output_dir_from_model_properties(
25 | project: str, name: str, art_path: str | None = None
26 | ) -> str:
27 | if art_path is None:
28 | art_path = get_default_art_path()
29 | return f"{art_path}/{project}/models/{name}"
30 |
31 |
32 | def get_step_checkpoint_dir(model_output_dir: str, step: int) -> str:
33 | return f"{model_output_dir}/checkpoints/{step:04d}"
34 |
35 |
36 | def get_trajectories_dir(model_output_dir: str) -> str:
37 | return f"{model_output_dir}/trajectories"
38 |
39 |
40 | def get_trajectories_split_dir(model_output_dir: str, split: str) -> str:
41 | return f"{model_output_dir}/trajectories/{split}"
42 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/retail/tools/transfer_to_human_agents.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class TransferToHumanAgents(Tool):
9 | @staticmethod
10 | def invoke(data: Dict[str, Any], summary: str) -> str:
11 | # This method simulates the transfer to a human agent.
12 | return "Transfer successful"
13 |
14 | @staticmethod
15 | def get_info() -> Dict[str, Any]:
16 | return {
17 | "type": "function",
18 | "function": {
19 | "name": "transfer_to_human_agents",
20 | "description": (
21 | "Transfer the user to a human agent, with a summary of the user's issue. "
22 | "Only transfer if the user explicitly asks for a human agent, or if the user's issue cannot be resolved by the agent with the available tools."
23 | ),
24 | "parameters": {
25 | "type": "object",
26 | "properties": {
27 | "summary": {
28 | "type": "string",
29 | "description": "A summary of the user's issue.",
30 | },
31 | },
32 | "required": ["summary"],
33 | },
34 | },
35 | }
36 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/env.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Optional, Union
4 |
5 | from tau_bench.envs.airline.data import load_data
6 | from tau_bench.envs.airline.rules import RULES
7 | from tau_bench.envs.airline.tools import ALL_TOOLS
8 | from tau_bench.envs.airline.wiki import WIKI
9 | from tau_bench.envs.base import Env
10 | from tau_bench.envs.user import UserStrategy
11 |
12 |
13 | class MockAirlineDomainEnv(Env):
14 | def __init__(
15 | self,
16 | user_strategy: Union[str, UserStrategy] = UserStrategy.LLM,
17 | user_model: str = "gpt-4o",
18 | user_provider: Optional[str] = None,
19 | task_split: str = "test",
20 | task_index: Optional[int] = None,
21 | ):
22 | match task_split:
23 | case "test":
24 | from tau_bench.envs.airline.tasks_test import TASKS as tasks
25 | case _:
26 | raise ValueError(f"Unknown task split: {task_split}")
27 | super().__init__(
28 | data_load_func=load_data,
29 | tools=ALL_TOOLS,
30 | tasks=tasks,
31 | wiki=WIKI,
32 | rules=RULES,
33 | user_strategy=user_strategy,
34 | user_model=user_model,
35 | user_provider=user_provider,
36 | task_index=task_index,
37 | )
38 | self.terminate_tools = ["transfer_to_human_agents"]
39 |
--------------------------------------------------------------------------------
/dev/tau-bench/tau_bench/envs/airline/tools/calculate.py:
--------------------------------------------------------------------------------
1 | # Copyright Sierra
2 |
3 | from typing import Any, Dict
4 |
5 | from tau_bench.envs.tool import Tool
6 |
7 |
8 | class Calculate(Tool):
9 | @staticmethod
10 | def invoke(data: Dict[str, Any], expression: str) -> str:
11 | if not all(char in "0123456789+-*/(). " for char in expression):
12 | return "Error: invalid characters in expression"
13 | try:
14 | return str(round(float(eval(expression, {"__builtins__": None}, {})), 2))
15 | except Exception as e:
16 | return f"Error: {e}"
17 |
18 | @staticmethod
19 | def get_info() -> Dict[str, Any]:
20 | return {
21 | "type": "function",
22 | "function": {
23 | "name": "calculate",
24 | "description": "Calculate the result of a mathematical expression.",
25 | "parameters": {
26 | "type": "object",
27 | "properties": {
28 | "expression": {
29 | "type": "string",
30 | "description": "The mathematical expression to calculate, such as '2 + 2'. The expression can contain numbers, operators (+, -, *, /), parentheses, and spaces.",
31 | },
32 | },
33 | "required": ["expression"],
34 | },
35 | },
36 | }
37 |
--------------------------------------------------------------------------------
/src/art/skypilot/stop_server.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import asyncio
3 |
4 | import sky
5 |
6 | from art.skypilot.backend import SkyPilotBackend
7 | from art.skypilot.utils import is_task_created, to_thread_typed
8 |
9 | parser = argparse.ArgumentParser(
10 | description="Close the art server hosted on a skypilot cluster"
11 | )
12 | parser.add_argument(
13 | "--cluster",
14 | type=str,
15 | required=True,
16 | help="The name of the skypilot cluster to close the art server on",
17 | )
18 | args = parser.parse_args()
19 |
20 |
21 | async def stop_server() -> None:
22 | cluster_status = await to_thread_typed(
23 | lambda: sky.stream_and_get(sky.status(cluster_names=[args.cluster]))
24 | )
25 | if len(cluster_status) == 0 or cluster_status[0]["status"] != sky.ClusterStatus.UP:
26 | raise ValueError(f"Cluster {args.cluster} is not running")
27 |
28 | if not await is_task_created(cluster_name=args.cluster, task_name="art_server"):
29 | raise ValueError(f"Art server task for cluster {args.cluster} is not running")
30 |
31 | backend = await SkyPilotBackend.initialize_cluster(
32 | cluster_name=args.cluster, art_version=".", env_path=".env", gpu="H100"
33 | )
34 | await backend.close()
35 |
36 | # cancel the art server task
37 | await to_thread_typed(lambda: sky.cancel(cluster_name=args.cluster, all=True))
38 |
39 |
40 | def main() -> None:
41 | asyncio.run(stop_server())
42 |
--------------------------------------------------------------------------------
/AGENT.md:
--------------------------------------------------------------------------------
1 | ## uv package manager by default
2 |
3 | This project uses the `uv` package manager.
4 |
5 | - To add a dependency, run `uv add `.
6 | - To run a script, run `uv run