├── .github └── workflows │ └── pre-commit.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── README.md ├── api │ ├── agents │ │ ├── agent.md │ │ └── utils.md │ ├── engine │ │ ├── agent_execution_engine.md │ │ └── agent_workflow_engine.md │ ├── environments │ │ ├── base.md │ │ └── env_utils.md │ ├── index.md │ ├── parser │ │ ├── chat_parser.md │ │ └── tool_parser.md │ ├── tools │ │ ├── code_tools.md │ │ ├── registry.md │ │ ├── tool_base.md │ │ └── web_tools.md │ ├── trainer │ │ ├── agent_trainer.md │ │ └── ray_runtime_env.md │ └── workflows │ │ └── workflow.md ├── assets │ ├── agentica-logo-black.png │ ├── agentica-logo.png │ ├── rllm_architecture.txt │ ├── rllm_components.png │ ├── rllm_logo_black.png │ ├── rllm_logo_blue.png │ └── rllm_logo_white.png ├── contributing.md ├── core-concepts │ ├── agent_env.md │ ├── eval-protocol.md │ ├── execution-engine.md │ ├── overview.md │ ├── rl-algos.md │ ├── training.md │ └── workflow-engine.md ├── examples │ ├── deepcoder.md │ ├── deepscaler.md │ ├── eval_protocol_frozen_lake.md │ ├── frozenlake.md │ ├── index.md │ ├── search.md │ ├── sft.md │ ├── solver_judge.md │ └── swe.md ├── getting-started │ ├── installation.md │ └── quick-start.md ├── index.md ├── requirements.txt └── stylesheets │ └── extra.css ├── examples ├── appworld │ └── run_appworld_agent.py ├── countdown │ ├── prepare_countdown_data.py │ ├── train_countdown.py │ └── train_countdown.sh ├── deepcoder │ ├── README.md │ ├── prepare_deepcoder_data.py │ ├── run_deepcoder.py │ ├── train_deepcoder.py │ ├── train_deepcoder_16k.sh │ └── train_deepcoder_32k.sh ├── deepresearch │ ├── .env.example │ ├── README.md │ ├── deepresearch_agent.py │ ├── deepresearch_tools.py │ ├── deepresearch_workflow.py │ └── evaluate_hle.py ├── deepscaler │ ├── README.md │ ├── prepare_math_data.py │ ├── run_deepscaler.py │ ├── train_deepscaler.py │ ├── train_deepscaler_16k.sh │ ├── train_deepscaler_24k.sh │ ├── train_deepscaler_8k.sh │ ├── train_deepscaler_megatron.py │ └── train_deepscaler_megatron.sh ├── eval_protocol │ ├── README.md │ ├── frozen_lake_flow.py │ ├── prepare_frozen_lake_data.py │ ├── run_frozen_lake_flow.py │ ├── train_frozen_lake_flow.py │ └── train_frozen_lake_flow.sh ├── fireworks_math │ ├── README.md │ ├── prepare_hendrycks_math_dataset.py │ ├── train_fireworks_math.py │ └── train_fireworks_math.sh ├── frozenlake │ ├── README.md │ ├── prepare_frozenlake_data.py │ ├── run_frozenlake_agent.py │ ├── train_frozenlake_agent.py │ ├── train_frozenlake_agent.sh │ └── workflow │ │ ├── train_frozenlake_agent.py │ │ └── train_frozenlake_agent.sh ├── math_tool │ ├── README.md │ ├── prepare_math_data.py │ ├── run_math_with_tool.py │ ├── train_math_with_tool.py │ └── train_math_with_tool.sh ├── mcp │ ├── README.md │ ├── prepare_hotpotqa_data.py │ └── run_tool_mcp.py ├── miniwob │ ├── README.md │ ├── prepare_miniwob_data.py │ ├── run_miniwob_agent.py │ ├── train_miniwob_agent.py │ ├── train_miniwob_agent.sh │ └── train_miniwob_agent_broadcast.sh ├── search │ ├── README.md │ ├── download_search_data.py │ ├── local_retrieval_tool.py │ ├── prepare_hotpotqa_data.py │ ├── retrieval │ │ ├── launch_server.sh │ │ └── server.py │ ├── run_search_agent.py │ ├── train_search_agent.py │ └── train_search_agent.sh ├── sft │ ├── README.md │ ├── generate_sft_data.py │ ├── prepare_math_data.py │ ├── run_sft_model.py │ ├── train_math_sft.py │ └── train_math_sft.sh ├── simple_math │ ├── prepare_math_dataset.py │ ├── train_hendrycks_math.py │ ├── train_hendrycks_math.sh │ └── workflow │ │ ├── simple_workflow │ │ ├── train_hendrycks_math.py │ │ └── train_hendrycks_math.sh │ │ └── single_turn_workflow │ │ ├── train_hendrycks_math.py │ │ └── train_hendrycks_math.sh ├── smolagents │ └── run_smolagents.py ├── solver_judge │ ├── prepare_countdown_data.py │ ├── run_solver_judge_flow.py │ ├── solver_judge_flow.py │ ├── solver_judge_flow_colab.ipynb │ ├── train_solver_judge_flow.py │ └── train_solver_judge_flow.sh ├── strands │ ├── .env.example │ ├── README.md │ ├── eval │ │ └── gaia │ │ │ ├── README.md │ │ │ ├── gaia_config.yaml │ │ │ ├── gaia_evaluator.py │ │ │ └── run_gaia_eval.py │ ├── gsearch_tool_wrapped.py │ ├── run_strands.py │ └── strands_workflow.py ├── swe │ ├── README.md │ ├── prepare_swe_data.py │ ├── run_deepswe.py │ ├── train_deepswe_32b.sh │ └── train_deepswe_agent.py ├── terminal │ ├── README.md │ ├── prepare_terminal_data.py │ ├── run_terminus.py │ └── terminus_workflow.py └── vimgolf │ ├── README.md │ ├── lib.py │ ├── prepare_vimgolf_data.py │ ├── requirements.txt │ ├── run_vimgolf.py │ ├── train_vimgolf_agent.py │ ├── train_vimgolf_agent.sh │ └── vimgolf_public_challenges.jsonl ├── mkdocs.yml ├── pyproject.toml ├── rllm ├── __init__.py ├── agents │ ├── __init__.py │ ├── agent.py │ ├── appworld_react_agents.py │ ├── code_agent.py │ ├── frozenlake_agent.py │ ├── math_agent.py │ ├── miniwob_agent.py │ ├── swe_agent.py │ ├── system_prompts.py │ ├── tool_agent.py │ ├── utils.py │ └── webarena_agent.py ├── data │ ├── __init__.py │ ├── dataset.py │ ├── dataset_types.py │ ├── preprocess │ │ ├── code │ │ │ ├── apps.ipynb │ │ │ ├── code_contests.ipynb │ │ │ ├── codeforces.ipynb │ │ │ ├── humanevalplus.ipynb │ │ │ ├── kodcode.ipynb │ │ │ ├── leetcode.ipynb │ │ │ ├── livecodebench.ipynb │ │ │ ├── primeintellect.ipynb │ │ │ └── taco.ipynb │ │ ├── difficulty_judge.py │ │ ├── gaia │ │ │ └── gaia.ipynb │ │ └── math │ │ │ ├── amc.ipynb │ │ │ ├── deepscaler.ipynb │ │ │ ├── deepscaler_7b.ipynb │ │ │ ├── gsm8k.ipynb │ │ │ ├── math.ipynb │ │ │ ├── minerva.ipynb │ │ │ ├── numina_olympiad.ipynb │ │ │ ├── olympiad_bench.ipynb │ │ │ ├── omni_math.ipynb │ │ │ └── still.ipynb │ └── utils.py ├── docs │ └── config.rst ├── engine │ ├── __init__.py │ ├── agent_execution_engine.py │ ├── agent_workflow_engine.py │ └── rollout │ │ ├── __init__.py │ │ ├── fireworks_engine.py │ │ ├── openai_engine.py │ │ ├── rollout_engine.py │ │ └── verl_engine.py ├── environments │ ├── __init__.py │ ├── appworld │ │ ├── appworld_env.py │ │ └── signal_patch.py │ ├── base │ │ ├── __init__.py │ │ ├── base_env.py │ │ ├── multi_turn_env.py │ │ └── single_turn_env.py │ ├── browsergym │ │ └── browsergym.py │ ├── code │ │ └── competition_coding.py │ ├── env_utils.py │ ├── frozenlake │ │ └── frozenlake.py │ ├── swe │ │ ├── cache_images_k8.py │ │ └── swe.py │ └── tools │ │ ├── mcp_env.py │ │ └── tool_env.py ├── globals.py ├── integrations │ ├── smolagents.py │ ├── strands.py │ ├── strands_adapter.py │ └── terminal_terminus_1.py ├── misc.py ├── parser │ ├── __init__.py │ ├── chat_template_parser.py │ ├── tool_parser.py │ └── utils.py ├── patches │ ├── __init__.py │ └── verl_patch_hook.py ├── rewards │ ├── __init__.py │ ├── code_reward.py │ ├── code_utils │ │ ├── codeforces.py │ │ ├── firejail_exec.py │ │ ├── humanevalplus.py │ │ ├── kodcode.py │ │ ├── livecodebench.py │ │ ├── pyext2.py │ │ ├── swebench.py │ │ ├── taco.py │ │ └── utils.py │ ├── countdown_reward.py │ ├── math_reward.py │ ├── math_utils │ │ ├── __init__.py │ │ └── utils.py │ ├── reward_fn.py │ ├── reward_types.py │ └── search_reward.py ├── system_prompts.py ├── tools │ ├── __init__.py │ ├── code_tools │ │ ├── __init__.py │ │ ├── code_tool.py │ │ ├── e2b_tool.py │ │ ├── lcb_tool.py │ │ ├── python_interpreter.py │ │ └── together_tool.py │ ├── mcp_tool.py │ ├── multi_tool.py │ ├── registry.py │ ├── tool_base.py │ ├── utils.py │ └── web_tools │ │ ├── __init__.py │ │ ├── firecrawl_tool.py │ │ ├── gsearch_tool.py │ │ └── tavily_tool.py ├── trainer │ ├── __init__.py │ ├── agent_sft_trainer.py │ ├── agent_trainer.py │ ├── config │ │ ├── __init__.py │ │ ├── _generated_agent_ppo_trainer.yaml │ │ ├── agent_ppo_trainer.yaml │ │ ├── agent_ppo_trainer_megatron.yaml │ │ └── agent_sft_trainer.yaml │ ├── env_agent_mappings.py │ └── verl │ │ ├── __init__.py │ │ ├── agent_ppo_trainer.py │ │ ├── agent_ppo_trainer_pipeline.py │ │ ├── agent_workflow_trainer.py │ │ ├── agent_workflow_trainer_fireworks.py │ │ ├── ray_runtime_env.py │ │ ├── train_agent_ppo.py │ │ ├── train_agent_ppo_pipeline.py │ │ └── train_workflow_pipeline.py ├── trajectory_visualizer.py ├── utils │ ├── __init__.py │ ├── compute_pass_at_k.py │ ├── episode_logger.py │ └── visualization.py └── workflows │ ├── __init__.py │ ├── cumulative_workflow.py │ ├── eval_protocol_workflow.py │ ├── multi_turn_workflow.py │ ├── simple_workflow.py │ ├── single_turn_workflow.py │ ├── timing_mixin.py │ └── workflow.py ├── scripts ├── agent │ ├── frozenlake │ │ ├── run_frozenlake.sh │ │ ├── run_frozenlake_broadcast.sh │ │ └── run_frozenlake_pipeline.sh │ ├── math │ │ ├── run_agent_math.sh │ │ └── run_agent_math_pipeline.sh │ ├── miniwob │ │ ├── run_miniwob.sh │ │ └── run_miniwob_broadcast.sh │ ├── swe │ │ └── deepswe_32b.sh │ └── webarena │ │ └── run_webarena_stepwise.sh ├── benchmark │ ├── cf_elo_calc.py │ ├── codeforces │ │ ├── __init__.py │ │ ├── metadata_cf.json │ │ └── ratings_2024.json │ ├── open_ai.py │ └── together_ai.py ├── data │ ├── code_dataset.py │ ├── dedupe_dataset.py │ ├── deepscaler_dataset.py │ ├── download_datasets.py │ ├── download_gaia.py │ ├── frozenlake_dataset.py │ ├── gaia_dataset.py │ ├── miniwob_dataset.py │ ├── swe_dataset.py │ └── webarena_dataset.py ├── dump_cfg.py ├── install_verl.sh └── train │ ├── async_math.sh │ ├── debug.sh │ ├── debug_agent.sh │ ├── debug_async.sh │ ├── deepcoder │ ├── deepcoder_14b_coding_16k.sh │ └── deepcoder_14b_coding_32k.sh │ ├── deepscaler_1.5b │ ├── run_deepscaler_1.5b_16k.sh │ ├── run_deepscaler_1.5b_24k.sh │ └── run_deepscaler_1.5b_8k.sh │ └── simple_math.sh ├── tests ├── agents │ ├── test_appworld_agent.py │ ├── test_frozenlake_agent.py │ ├── test_math_agent.py │ └── test_tool_agent.py ├── envs │ ├── test_frozenlake_env.py │ ├── test_mcp_env.py │ └── test_tool_env.py ├── parser │ ├── test_chat_parser.py │ └── test_tool_parser.py ├── rewards │ ├── test_code_reward.py │ └── test_math_reward.py ├── tools │ └── test_tools.py └── trainer │ └── verl │ └── test_ray_runtime_env.py └── uv.lock /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/README.md -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/api/agents/agent.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/agents/agent.md -------------------------------------------------------------------------------- /docs/api/agents/utils.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/agents/utils.md -------------------------------------------------------------------------------- /docs/api/engine/agent_execution_engine.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/engine/agent_execution_engine.md -------------------------------------------------------------------------------- /docs/api/engine/agent_workflow_engine.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/engine/agent_workflow_engine.md -------------------------------------------------------------------------------- /docs/api/environments/base.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/environments/base.md -------------------------------------------------------------------------------- /docs/api/environments/env_utils.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/environments/env_utils.md -------------------------------------------------------------------------------- /docs/api/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/index.md -------------------------------------------------------------------------------- /docs/api/parser/chat_parser.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/parser/chat_parser.md -------------------------------------------------------------------------------- /docs/api/parser/tool_parser.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/parser/tool_parser.md -------------------------------------------------------------------------------- /docs/api/tools/code_tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/tools/code_tools.md -------------------------------------------------------------------------------- /docs/api/tools/registry.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/tools/registry.md -------------------------------------------------------------------------------- /docs/api/tools/tool_base.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/tools/tool_base.md -------------------------------------------------------------------------------- /docs/api/tools/web_tools.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/tools/web_tools.md -------------------------------------------------------------------------------- /docs/api/trainer/agent_trainer.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/trainer/agent_trainer.md -------------------------------------------------------------------------------- /docs/api/trainer/ray_runtime_env.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/trainer/ray_runtime_env.md -------------------------------------------------------------------------------- /docs/api/workflows/workflow.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/api/workflows/workflow.md -------------------------------------------------------------------------------- /docs/assets/agentica-logo-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/agentica-logo-black.png -------------------------------------------------------------------------------- /docs/assets/agentica-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/agentica-logo.png -------------------------------------------------------------------------------- /docs/assets/rllm_architecture.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/rllm_architecture.txt -------------------------------------------------------------------------------- /docs/assets/rllm_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/rllm_components.png -------------------------------------------------------------------------------- /docs/assets/rllm_logo_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/rllm_logo_black.png -------------------------------------------------------------------------------- /docs/assets/rllm_logo_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/rllm_logo_blue.png -------------------------------------------------------------------------------- /docs/assets/rllm_logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/assets/rllm_logo_white.png -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/contributing.md -------------------------------------------------------------------------------- /docs/core-concepts/agent_env.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/agent_env.md -------------------------------------------------------------------------------- /docs/core-concepts/eval-protocol.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/eval-protocol.md -------------------------------------------------------------------------------- /docs/core-concepts/execution-engine.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/execution-engine.md -------------------------------------------------------------------------------- /docs/core-concepts/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/overview.md -------------------------------------------------------------------------------- /docs/core-concepts/rl-algos.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/rl-algos.md -------------------------------------------------------------------------------- /docs/core-concepts/training.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/training.md -------------------------------------------------------------------------------- /docs/core-concepts/workflow-engine.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/core-concepts/workflow-engine.md -------------------------------------------------------------------------------- /docs/examples/deepcoder.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/deepcoder.md -------------------------------------------------------------------------------- /docs/examples/deepscaler.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/deepscaler.md -------------------------------------------------------------------------------- /docs/examples/eval_protocol_frozen_lake.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/eval_protocol_frozen_lake.md -------------------------------------------------------------------------------- /docs/examples/frozenlake.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/frozenlake.md -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/index.md -------------------------------------------------------------------------------- /docs/examples/search.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/search.md -------------------------------------------------------------------------------- /docs/examples/sft.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/sft.md -------------------------------------------------------------------------------- /docs/examples/solver_judge.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/solver_judge.md -------------------------------------------------------------------------------- /docs/examples/swe.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/examples/swe.md -------------------------------------------------------------------------------- /docs/getting-started/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/getting-started/installation.md -------------------------------------------------------------------------------- /docs/getting-started/quick-start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/getting-started/quick-start.md -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/index.md -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/docs/stylesheets/extra.css -------------------------------------------------------------------------------- /examples/appworld/run_appworld_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/appworld/run_appworld_agent.py -------------------------------------------------------------------------------- /examples/countdown/prepare_countdown_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/countdown/prepare_countdown_data.py -------------------------------------------------------------------------------- /examples/countdown/train_countdown.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/countdown/train_countdown.py -------------------------------------------------------------------------------- /examples/countdown/train_countdown.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/countdown/train_countdown.sh -------------------------------------------------------------------------------- /examples/deepcoder/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/README.md -------------------------------------------------------------------------------- /examples/deepcoder/prepare_deepcoder_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/prepare_deepcoder_data.py -------------------------------------------------------------------------------- /examples/deepcoder/run_deepcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/run_deepcoder.py -------------------------------------------------------------------------------- /examples/deepcoder/train_deepcoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/train_deepcoder.py -------------------------------------------------------------------------------- /examples/deepcoder/train_deepcoder_16k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/train_deepcoder_16k.sh -------------------------------------------------------------------------------- /examples/deepcoder/train_deepcoder_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepcoder/train_deepcoder_32k.sh -------------------------------------------------------------------------------- /examples/deepresearch/.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/.env.example -------------------------------------------------------------------------------- /examples/deepresearch/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/README.md -------------------------------------------------------------------------------- /examples/deepresearch/deepresearch_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/deepresearch_agent.py -------------------------------------------------------------------------------- /examples/deepresearch/deepresearch_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/deepresearch_tools.py -------------------------------------------------------------------------------- /examples/deepresearch/deepresearch_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/deepresearch_workflow.py -------------------------------------------------------------------------------- /examples/deepresearch/evaluate_hle.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepresearch/evaluate_hle.py -------------------------------------------------------------------------------- /examples/deepscaler/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/README.md -------------------------------------------------------------------------------- /examples/deepscaler/prepare_math_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/prepare_math_data.py -------------------------------------------------------------------------------- /examples/deepscaler/run_deepscaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/run_deepscaler.py -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler.py -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler_16k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler_16k.sh -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler_24k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler_24k.sh -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler_8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler_8k.sh -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler_megatron.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler_megatron.py -------------------------------------------------------------------------------- /examples/deepscaler/train_deepscaler_megatron.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/deepscaler/train_deepscaler_megatron.sh -------------------------------------------------------------------------------- /examples/eval_protocol/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/README.md -------------------------------------------------------------------------------- /examples/eval_protocol/frozen_lake_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/frozen_lake_flow.py -------------------------------------------------------------------------------- /examples/eval_protocol/prepare_frozen_lake_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/prepare_frozen_lake_data.py -------------------------------------------------------------------------------- /examples/eval_protocol/run_frozen_lake_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/run_frozen_lake_flow.py -------------------------------------------------------------------------------- /examples/eval_protocol/train_frozen_lake_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/train_frozen_lake_flow.py -------------------------------------------------------------------------------- /examples/eval_protocol/train_frozen_lake_flow.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/eval_protocol/train_frozen_lake_flow.sh -------------------------------------------------------------------------------- /examples/fireworks_math/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/fireworks_math/README.md -------------------------------------------------------------------------------- /examples/fireworks_math/prepare_hendrycks_math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/fireworks_math/prepare_hendrycks_math_dataset.py -------------------------------------------------------------------------------- /examples/fireworks_math/train_fireworks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/fireworks_math/train_fireworks_math.py -------------------------------------------------------------------------------- /examples/fireworks_math/train_fireworks_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/fireworks_math/train_fireworks_math.sh -------------------------------------------------------------------------------- /examples/frozenlake/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/README.md -------------------------------------------------------------------------------- /examples/frozenlake/prepare_frozenlake_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/prepare_frozenlake_data.py -------------------------------------------------------------------------------- /examples/frozenlake/run_frozenlake_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/run_frozenlake_agent.py -------------------------------------------------------------------------------- /examples/frozenlake/train_frozenlake_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/train_frozenlake_agent.py -------------------------------------------------------------------------------- /examples/frozenlake/train_frozenlake_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/train_frozenlake_agent.sh -------------------------------------------------------------------------------- /examples/frozenlake/workflow/train_frozenlake_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/workflow/train_frozenlake_agent.py -------------------------------------------------------------------------------- /examples/frozenlake/workflow/train_frozenlake_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/frozenlake/workflow/train_frozenlake_agent.sh -------------------------------------------------------------------------------- /examples/math_tool/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/math_tool/README.md -------------------------------------------------------------------------------- /examples/math_tool/prepare_math_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/math_tool/prepare_math_data.py -------------------------------------------------------------------------------- /examples/math_tool/run_math_with_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/math_tool/run_math_with_tool.py -------------------------------------------------------------------------------- /examples/math_tool/train_math_with_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/math_tool/train_math_with_tool.py -------------------------------------------------------------------------------- /examples/math_tool/train_math_with_tool.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/math_tool/train_math_with_tool.sh -------------------------------------------------------------------------------- /examples/mcp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/mcp/README.md -------------------------------------------------------------------------------- /examples/mcp/prepare_hotpotqa_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/mcp/prepare_hotpotqa_data.py -------------------------------------------------------------------------------- /examples/mcp/run_tool_mcp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/mcp/run_tool_mcp.py -------------------------------------------------------------------------------- /examples/miniwob/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/README.md -------------------------------------------------------------------------------- /examples/miniwob/prepare_miniwob_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/prepare_miniwob_data.py -------------------------------------------------------------------------------- /examples/miniwob/run_miniwob_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/run_miniwob_agent.py -------------------------------------------------------------------------------- /examples/miniwob/train_miniwob_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/train_miniwob_agent.py -------------------------------------------------------------------------------- /examples/miniwob/train_miniwob_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/train_miniwob_agent.sh -------------------------------------------------------------------------------- /examples/miniwob/train_miniwob_agent_broadcast.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/miniwob/train_miniwob_agent_broadcast.sh -------------------------------------------------------------------------------- /examples/search/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/README.md -------------------------------------------------------------------------------- /examples/search/download_search_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/download_search_data.py -------------------------------------------------------------------------------- /examples/search/local_retrieval_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/local_retrieval_tool.py -------------------------------------------------------------------------------- /examples/search/prepare_hotpotqa_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/prepare_hotpotqa_data.py -------------------------------------------------------------------------------- /examples/search/retrieval/launch_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/retrieval/launch_server.sh -------------------------------------------------------------------------------- /examples/search/retrieval/server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/retrieval/server.py -------------------------------------------------------------------------------- /examples/search/run_search_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/run_search_agent.py -------------------------------------------------------------------------------- /examples/search/train_search_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/train_search_agent.py -------------------------------------------------------------------------------- /examples/search/train_search_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/search/train_search_agent.sh -------------------------------------------------------------------------------- /examples/sft/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/README.md -------------------------------------------------------------------------------- /examples/sft/generate_sft_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/generate_sft_data.py -------------------------------------------------------------------------------- /examples/sft/prepare_math_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/prepare_math_data.py -------------------------------------------------------------------------------- /examples/sft/run_sft_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/run_sft_model.py -------------------------------------------------------------------------------- /examples/sft/train_math_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/train_math_sft.py -------------------------------------------------------------------------------- /examples/sft/train_math_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/sft/train_math_sft.sh -------------------------------------------------------------------------------- /examples/simple_math/prepare_math_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/prepare_math_dataset.py -------------------------------------------------------------------------------- /examples/simple_math/train_hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/train_hendrycks_math.py -------------------------------------------------------------------------------- /examples/simple_math/train_hendrycks_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/train_hendrycks_math.sh -------------------------------------------------------------------------------- /examples/simple_math/workflow/simple_workflow/train_hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/workflow/simple_workflow/train_hendrycks_math.py -------------------------------------------------------------------------------- /examples/simple_math/workflow/simple_workflow/train_hendrycks_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/workflow/simple_workflow/train_hendrycks_math.sh -------------------------------------------------------------------------------- /examples/simple_math/workflow/single_turn_workflow/train_hendrycks_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/workflow/single_turn_workflow/train_hendrycks_math.py -------------------------------------------------------------------------------- /examples/simple_math/workflow/single_turn_workflow/train_hendrycks_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/simple_math/workflow/single_turn_workflow/train_hendrycks_math.sh -------------------------------------------------------------------------------- /examples/smolagents/run_smolagents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/smolagents/run_smolagents.py -------------------------------------------------------------------------------- /examples/solver_judge/prepare_countdown_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/prepare_countdown_data.py -------------------------------------------------------------------------------- /examples/solver_judge/run_solver_judge_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/run_solver_judge_flow.py -------------------------------------------------------------------------------- /examples/solver_judge/solver_judge_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/solver_judge_flow.py -------------------------------------------------------------------------------- /examples/solver_judge/solver_judge_flow_colab.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/solver_judge_flow_colab.ipynb -------------------------------------------------------------------------------- /examples/solver_judge/train_solver_judge_flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/train_solver_judge_flow.py -------------------------------------------------------------------------------- /examples/solver_judge/train_solver_judge_flow.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/solver_judge/train_solver_judge_flow.sh -------------------------------------------------------------------------------- /examples/strands/.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/.env.example -------------------------------------------------------------------------------- /examples/strands/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/README.md -------------------------------------------------------------------------------- /examples/strands/eval/gaia/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/eval/gaia/README.md -------------------------------------------------------------------------------- /examples/strands/eval/gaia/gaia_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/eval/gaia/gaia_config.yaml -------------------------------------------------------------------------------- /examples/strands/eval/gaia/gaia_evaluator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/eval/gaia/gaia_evaluator.py -------------------------------------------------------------------------------- /examples/strands/eval/gaia/run_gaia_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/eval/gaia/run_gaia_eval.py -------------------------------------------------------------------------------- /examples/strands/gsearch_tool_wrapped.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/gsearch_tool_wrapped.py -------------------------------------------------------------------------------- /examples/strands/run_strands.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/run_strands.py -------------------------------------------------------------------------------- /examples/strands/strands_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/strands/strands_workflow.py -------------------------------------------------------------------------------- /examples/swe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/swe/README.md -------------------------------------------------------------------------------- /examples/swe/prepare_swe_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/swe/prepare_swe_data.py -------------------------------------------------------------------------------- /examples/swe/run_deepswe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/swe/run_deepswe.py -------------------------------------------------------------------------------- /examples/swe/train_deepswe_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/swe/train_deepswe_32b.sh -------------------------------------------------------------------------------- /examples/swe/train_deepswe_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/swe/train_deepswe_agent.py -------------------------------------------------------------------------------- /examples/terminal/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/terminal/README.md -------------------------------------------------------------------------------- /examples/terminal/prepare_terminal_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/terminal/prepare_terminal_data.py -------------------------------------------------------------------------------- /examples/terminal/run_terminus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/terminal/run_terminus.py -------------------------------------------------------------------------------- /examples/terminal/terminus_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/terminal/terminus_workflow.py -------------------------------------------------------------------------------- /examples/vimgolf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/README.md -------------------------------------------------------------------------------- /examples/vimgolf/lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/lib.py -------------------------------------------------------------------------------- /examples/vimgolf/prepare_vimgolf_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/prepare_vimgolf_data.py -------------------------------------------------------------------------------- /examples/vimgolf/requirements.txt: -------------------------------------------------------------------------------- 1 | vimgolf-gym==0.1.1 2 | hydra-core 3 | omegaconf -------------------------------------------------------------------------------- /examples/vimgolf/run_vimgolf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/run_vimgolf.py -------------------------------------------------------------------------------- /examples/vimgolf/train_vimgolf_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/train_vimgolf_agent.py -------------------------------------------------------------------------------- /examples/vimgolf/train_vimgolf_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/train_vimgolf_agent.sh -------------------------------------------------------------------------------- /examples/vimgolf/vimgolf_public_challenges.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/examples/vimgolf/vimgolf_public_challenges.jsonl -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/mkdocs.yml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/pyproject.toml -------------------------------------------------------------------------------- /rllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/__init__.py -------------------------------------------------------------------------------- /rllm/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/__init__.py -------------------------------------------------------------------------------- /rllm/agents/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/agent.py -------------------------------------------------------------------------------- /rllm/agents/appworld_react_agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/appworld_react_agents.py -------------------------------------------------------------------------------- /rllm/agents/code_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/code_agent.py -------------------------------------------------------------------------------- /rllm/agents/frozenlake_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/frozenlake_agent.py -------------------------------------------------------------------------------- /rllm/agents/math_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/math_agent.py -------------------------------------------------------------------------------- /rllm/agents/miniwob_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/miniwob_agent.py -------------------------------------------------------------------------------- /rllm/agents/swe_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/swe_agent.py -------------------------------------------------------------------------------- /rllm/agents/system_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/system_prompts.py -------------------------------------------------------------------------------- /rllm/agents/tool_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/tool_agent.py -------------------------------------------------------------------------------- /rllm/agents/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/utils.py -------------------------------------------------------------------------------- /rllm/agents/webarena_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/agents/webarena_agent.py -------------------------------------------------------------------------------- /rllm/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/__init__.py -------------------------------------------------------------------------------- /rllm/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/dataset.py -------------------------------------------------------------------------------- /rllm/data/dataset_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/dataset_types.py -------------------------------------------------------------------------------- /rllm/data/preprocess/code/apps.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/apps.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/code_contests.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/code_contests.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/codeforces.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/codeforces.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/humanevalplus.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/humanevalplus.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/kodcode.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/kodcode.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/leetcode.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/leetcode.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/livecodebench.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/livecodebench.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/primeintellect.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/primeintellect.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/code/taco.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/code/taco.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/difficulty_judge.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/difficulty_judge.py -------------------------------------------------------------------------------- /rllm/data/preprocess/gaia/gaia.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/gaia/gaia.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/amc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/amc.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/deepscaler.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/deepscaler.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/deepscaler_7b.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/deepscaler_7b.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/gsm8k.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/gsm8k.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/math.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/math.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/minerva.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/minerva.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/numina_olympiad.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/numina_olympiad.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/olympiad_bench.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/olympiad_bench.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/omni_math.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/omni_math.ipynb -------------------------------------------------------------------------------- /rllm/data/preprocess/math/still.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/preprocess/math/still.ipynb -------------------------------------------------------------------------------- /rllm/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/data/utils.py -------------------------------------------------------------------------------- /rllm/docs/config.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/docs/config.rst -------------------------------------------------------------------------------- /rllm/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/__init__.py -------------------------------------------------------------------------------- /rllm/engine/agent_execution_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/agent_execution_engine.py -------------------------------------------------------------------------------- /rllm/engine/agent_workflow_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/agent_workflow_engine.py -------------------------------------------------------------------------------- /rllm/engine/rollout/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/rollout/__init__.py -------------------------------------------------------------------------------- /rllm/engine/rollout/fireworks_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/rollout/fireworks_engine.py -------------------------------------------------------------------------------- /rllm/engine/rollout/openai_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/rollout/openai_engine.py -------------------------------------------------------------------------------- /rllm/engine/rollout/rollout_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/rollout/rollout_engine.py -------------------------------------------------------------------------------- /rllm/engine/rollout/verl_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/engine/rollout/verl_engine.py -------------------------------------------------------------------------------- /rllm/environments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/__init__.py -------------------------------------------------------------------------------- /rllm/environments/appworld/appworld_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/appworld/appworld_env.py -------------------------------------------------------------------------------- /rllm/environments/appworld/signal_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/appworld/signal_patch.py -------------------------------------------------------------------------------- /rllm/environments/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/base/__init__.py -------------------------------------------------------------------------------- /rllm/environments/base/base_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/base/base_env.py -------------------------------------------------------------------------------- /rllm/environments/base/multi_turn_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/base/multi_turn_env.py -------------------------------------------------------------------------------- /rllm/environments/base/single_turn_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/base/single_turn_env.py -------------------------------------------------------------------------------- /rllm/environments/browsergym/browsergym.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/browsergym/browsergym.py -------------------------------------------------------------------------------- /rllm/environments/code/competition_coding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/code/competition_coding.py -------------------------------------------------------------------------------- /rllm/environments/env_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/env_utils.py -------------------------------------------------------------------------------- /rllm/environments/frozenlake/frozenlake.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/frozenlake/frozenlake.py -------------------------------------------------------------------------------- /rllm/environments/swe/cache_images_k8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/swe/cache_images_k8.py -------------------------------------------------------------------------------- /rllm/environments/swe/swe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/swe/swe.py -------------------------------------------------------------------------------- /rllm/environments/tools/mcp_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/tools/mcp_env.py -------------------------------------------------------------------------------- /rllm/environments/tools/tool_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/environments/tools/tool_env.py -------------------------------------------------------------------------------- /rllm/globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/globals.py -------------------------------------------------------------------------------- /rllm/integrations/smolagents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/integrations/smolagents.py -------------------------------------------------------------------------------- /rllm/integrations/strands.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/integrations/strands.py -------------------------------------------------------------------------------- /rllm/integrations/strands_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/integrations/strands_adapter.py -------------------------------------------------------------------------------- /rllm/integrations/terminal_terminus_1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/integrations/terminal_terminus_1.py -------------------------------------------------------------------------------- /rllm/misc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/misc.py -------------------------------------------------------------------------------- /rllm/parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/parser/__init__.py -------------------------------------------------------------------------------- /rllm/parser/chat_template_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/parser/chat_template_parser.py -------------------------------------------------------------------------------- /rllm/parser/tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/parser/tool_parser.py -------------------------------------------------------------------------------- /rllm/parser/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/parser/utils.py -------------------------------------------------------------------------------- /rllm/patches/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rllm/patches/verl_patch_hook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/patches/verl_patch_hook.py -------------------------------------------------------------------------------- /rllm/rewards/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/__init__.py -------------------------------------------------------------------------------- /rllm/rewards/code_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_reward.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/codeforces.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/codeforces.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/firejail_exec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/firejail_exec.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/humanevalplus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/humanevalplus.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/kodcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/kodcode.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/livecodebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/livecodebench.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/pyext2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/pyext2.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/swebench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/swebench.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/taco.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/taco.py -------------------------------------------------------------------------------- /rllm/rewards/code_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/code_utils/utils.py -------------------------------------------------------------------------------- /rllm/rewards/countdown_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/countdown_reward.py -------------------------------------------------------------------------------- /rllm/rewards/math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/math_reward.py -------------------------------------------------------------------------------- /rllm/rewards/math_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/math_utils/__init__.py -------------------------------------------------------------------------------- /rllm/rewards/math_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/math_utils/utils.py -------------------------------------------------------------------------------- /rllm/rewards/reward_fn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/reward_fn.py -------------------------------------------------------------------------------- /rllm/rewards/reward_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/reward_types.py -------------------------------------------------------------------------------- /rllm/rewards/search_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/rewards/search_reward.py -------------------------------------------------------------------------------- /rllm/system_prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/system_prompts.py -------------------------------------------------------------------------------- /rllm/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/__init__.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/__init__.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/code_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/code_tool.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/e2b_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/e2b_tool.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/lcb_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/lcb_tool.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/python_interpreter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/python_interpreter.py -------------------------------------------------------------------------------- /rllm/tools/code_tools/together_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/code_tools/together_tool.py -------------------------------------------------------------------------------- /rllm/tools/mcp_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/mcp_tool.py -------------------------------------------------------------------------------- /rllm/tools/multi_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/multi_tool.py -------------------------------------------------------------------------------- /rllm/tools/registry.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/registry.py -------------------------------------------------------------------------------- /rllm/tools/tool_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/tool_base.py -------------------------------------------------------------------------------- /rllm/tools/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/utils.py -------------------------------------------------------------------------------- /rllm/tools/web_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/web_tools/__init__.py -------------------------------------------------------------------------------- /rllm/tools/web_tools/firecrawl_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/web_tools/firecrawl_tool.py -------------------------------------------------------------------------------- /rllm/tools/web_tools/gsearch_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/web_tools/gsearch_tool.py -------------------------------------------------------------------------------- /rllm/tools/web_tools/tavily_tool.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/tools/web_tools/tavily_tool.py -------------------------------------------------------------------------------- /rllm/trainer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/__init__.py -------------------------------------------------------------------------------- /rllm/trainer/agent_sft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/agent_sft_trainer.py -------------------------------------------------------------------------------- /rllm/trainer/agent_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/agent_trainer.py -------------------------------------------------------------------------------- /rllm/trainer/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rllm/trainer/config/_generated_agent_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/config/_generated_agent_ppo_trainer.yaml -------------------------------------------------------------------------------- /rllm/trainer/config/agent_ppo_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/config/agent_ppo_trainer.yaml -------------------------------------------------------------------------------- /rllm/trainer/config/agent_ppo_trainer_megatron.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/config/agent_ppo_trainer_megatron.yaml -------------------------------------------------------------------------------- /rllm/trainer/config/agent_sft_trainer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/config/agent_sft_trainer.yaml -------------------------------------------------------------------------------- /rllm/trainer/env_agent_mappings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/env_agent_mappings.py -------------------------------------------------------------------------------- /rllm/trainer/verl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /rllm/trainer/verl/agent_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/agent_ppo_trainer.py -------------------------------------------------------------------------------- /rllm/trainer/verl/agent_ppo_trainer_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/agent_ppo_trainer_pipeline.py -------------------------------------------------------------------------------- /rllm/trainer/verl/agent_workflow_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/agent_workflow_trainer.py -------------------------------------------------------------------------------- /rllm/trainer/verl/agent_workflow_trainer_fireworks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/agent_workflow_trainer_fireworks.py -------------------------------------------------------------------------------- /rllm/trainer/verl/ray_runtime_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/ray_runtime_env.py -------------------------------------------------------------------------------- /rllm/trainer/verl/train_agent_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/train_agent_ppo.py -------------------------------------------------------------------------------- /rllm/trainer/verl/train_agent_ppo_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/train_agent_ppo_pipeline.py -------------------------------------------------------------------------------- /rllm/trainer/verl/train_workflow_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trainer/verl/train_workflow_pipeline.py -------------------------------------------------------------------------------- /rllm/trajectory_visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/trajectory_visualizer.py -------------------------------------------------------------------------------- /rllm/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/utils/__init__.py -------------------------------------------------------------------------------- /rllm/utils/compute_pass_at_k.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/utils/compute_pass_at_k.py -------------------------------------------------------------------------------- /rllm/utils/episode_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/utils/episode_logger.py -------------------------------------------------------------------------------- /rllm/utils/visualization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/utils/visualization.py -------------------------------------------------------------------------------- /rllm/workflows/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/__init__.py -------------------------------------------------------------------------------- /rllm/workflows/cumulative_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/cumulative_workflow.py -------------------------------------------------------------------------------- /rllm/workflows/eval_protocol_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/eval_protocol_workflow.py -------------------------------------------------------------------------------- /rllm/workflows/multi_turn_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/multi_turn_workflow.py -------------------------------------------------------------------------------- /rllm/workflows/simple_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/simple_workflow.py -------------------------------------------------------------------------------- /rllm/workflows/single_turn_workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/single_turn_workflow.py -------------------------------------------------------------------------------- /rllm/workflows/timing_mixin.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/timing_mixin.py -------------------------------------------------------------------------------- /rllm/workflows/workflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/rllm/workflows/workflow.py -------------------------------------------------------------------------------- /scripts/agent/frozenlake/run_frozenlake.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/frozenlake/run_frozenlake.sh -------------------------------------------------------------------------------- /scripts/agent/frozenlake/run_frozenlake_broadcast.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/frozenlake/run_frozenlake_broadcast.sh -------------------------------------------------------------------------------- /scripts/agent/frozenlake/run_frozenlake_pipeline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/frozenlake/run_frozenlake_pipeline.sh -------------------------------------------------------------------------------- /scripts/agent/math/run_agent_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/math/run_agent_math.sh -------------------------------------------------------------------------------- /scripts/agent/math/run_agent_math_pipeline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/math/run_agent_math_pipeline.sh -------------------------------------------------------------------------------- /scripts/agent/miniwob/run_miniwob.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/miniwob/run_miniwob.sh -------------------------------------------------------------------------------- /scripts/agent/miniwob/run_miniwob_broadcast.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/miniwob/run_miniwob_broadcast.sh -------------------------------------------------------------------------------- /scripts/agent/swe/deepswe_32b.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/swe/deepswe_32b.sh -------------------------------------------------------------------------------- /scripts/agent/webarena/run_webarena_stepwise.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/agent/webarena/run_webarena_stepwise.sh -------------------------------------------------------------------------------- /scripts/benchmark/cf_elo_calc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/benchmark/cf_elo_calc.py -------------------------------------------------------------------------------- /scripts/benchmark/codeforces/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/benchmark/codeforces/metadata_cf.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/benchmark/codeforces/metadata_cf.json -------------------------------------------------------------------------------- /scripts/benchmark/codeforces/ratings_2024.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/benchmark/codeforces/ratings_2024.json -------------------------------------------------------------------------------- /scripts/benchmark/open_ai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/benchmark/open_ai.py -------------------------------------------------------------------------------- /scripts/benchmark/together_ai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/benchmark/together_ai.py -------------------------------------------------------------------------------- /scripts/data/code_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/code_dataset.py -------------------------------------------------------------------------------- /scripts/data/dedupe_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/dedupe_dataset.py -------------------------------------------------------------------------------- /scripts/data/deepscaler_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/deepscaler_dataset.py -------------------------------------------------------------------------------- /scripts/data/download_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/download_datasets.py -------------------------------------------------------------------------------- /scripts/data/download_gaia.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/download_gaia.py -------------------------------------------------------------------------------- /scripts/data/frozenlake_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/frozenlake_dataset.py -------------------------------------------------------------------------------- /scripts/data/gaia_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/gaia_dataset.py -------------------------------------------------------------------------------- /scripts/data/miniwob_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/miniwob_dataset.py -------------------------------------------------------------------------------- /scripts/data/swe_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/swe_dataset.py -------------------------------------------------------------------------------- /scripts/data/webarena_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/data/webarena_dataset.py -------------------------------------------------------------------------------- /scripts/dump_cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/dump_cfg.py -------------------------------------------------------------------------------- /scripts/install_verl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/install_verl.sh -------------------------------------------------------------------------------- /scripts/train/async_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/async_math.sh -------------------------------------------------------------------------------- /scripts/train/debug.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/debug.sh -------------------------------------------------------------------------------- /scripts/train/debug_agent.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/debug_agent.sh -------------------------------------------------------------------------------- /scripts/train/debug_async.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/debug_async.sh -------------------------------------------------------------------------------- /scripts/train/deepcoder/deepcoder_14b_coding_16k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/deepcoder/deepcoder_14b_coding_16k.sh -------------------------------------------------------------------------------- /scripts/train/deepcoder/deepcoder_14b_coding_32k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/deepcoder/deepcoder_14b_coding_32k.sh -------------------------------------------------------------------------------- /scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_16k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_16k.sh -------------------------------------------------------------------------------- /scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_24k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_24k.sh -------------------------------------------------------------------------------- /scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_8k.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/deepscaler_1.5b/run_deepscaler_1.5b_8k.sh -------------------------------------------------------------------------------- /scripts/train/simple_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/scripts/train/simple_math.sh -------------------------------------------------------------------------------- /tests/agents/test_appworld_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/agents/test_appworld_agent.py -------------------------------------------------------------------------------- /tests/agents/test_frozenlake_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/agents/test_frozenlake_agent.py -------------------------------------------------------------------------------- /tests/agents/test_math_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/agents/test_math_agent.py -------------------------------------------------------------------------------- /tests/agents/test_tool_agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/agents/test_tool_agent.py -------------------------------------------------------------------------------- /tests/envs/test_frozenlake_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/envs/test_frozenlake_env.py -------------------------------------------------------------------------------- /tests/envs/test_mcp_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/envs/test_mcp_env.py -------------------------------------------------------------------------------- /tests/envs/test_tool_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/envs/test_tool_env.py -------------------------------------------------------------------------------- /tests/parser/test_chat_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/parser/test_chat_parser.py -------------------------------------------------------------------------------- /tests/parser/test_tool_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/parser/test_tool_parser.py -------------------------------------------------------------------------------- /tests/rewards/test_code_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/rewards/test_code_reward.py -------------------------------------------------------------------------------- /tests/rewards/test_math_reward.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/rewards/test_math_reward.py -------------------------------------------------------------------------------- /tests/tools/test_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/tools/test_tools.py -------------------------------------------------------------------------------- /tests/trainer/verl/test_ray_runtime_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/tests/trainer/verl/test_ray_runtime_env.py -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rllm-org/rllm/HEAD/uv.lock --------------------------------------------------------------------------------