├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── assets ├── actor.png ├── figure1.jpg ├── guessing_success.png ├── logs.png ├── losses.png ├── rl_loss.png └── structure.jpg ├── conf ├── accelerate │ ├── base_mp.yaml │ ├── base_nomp.yaml │ ├── deepspeed.yaml │ ├── fsdp_mp.yaml │ ├── fsdp_mp_grad_op.yaml │ └── fsdp_nomp.yaml ├── actor │ └── web.yaml ├── base.yaml ├── base_4gpu.yaml ├── chartqa.yaml ├── counting.yaml ├── counting_tapeagent.yaml ├── debug.yaml ├── deepscaler15b.yaml ├── deepspeed │ ├── deepspeed_stage1.json │ ├── deepspeed_stage1_bf16.json │ ├── deepspeed_stage2_bf16.json │ ├── deepspeed_stage3.json │ ├── deepspeed_stage3_bf16.json │ └── deepspeed_stage3_bf16_group4.json ├── finetune │ ├── actor_critic.yaml │ ├── base.yaml │ ├── grpo.yaml │ └── ppo.yaml ├── guessing.yaml ├── math.yaml ├── mcp │ └── web.json ├── miniwob.yaml ├── miniwob_grpo.yaml ├── miniwob_uic_grpo.yaml ├── miniwob_uic_ppo.yaml ├── rewards │ ├── alex.yaml │ ├── base.yaml │ ├── dima.yaml │ ├── format.yaml │ ├── pure_success.yaml │ ├── success.yaml │ ├── success_and_format.yaml │ └── xiaoyin.yaml ├── streams │ ├── files.yaml │ └── redis.yaml └── test.yaml ├── pipelinerl ├── actor.py ├── architecture.png ├── async_llm.py ├── browse.py ├── countdown_utils.py ├── domains │ ├── __init__.py │ ├── chartqa │ │ ├── README.md │ │ ├── __init__.py │ │ ├── chartqa.py │ │ ├── evaluation.py │ │ ├── learning_curve.png │ │ └── load_datasets.py │ ├── counting │ │ ├── __init__.py │ │ ├── counting.py │ │ ├── tapeagent.py │ │ ├── test_counting_problems.json │ │ └── train_counting_problems.json │ ├── deep_research │ │ ├── __init__.py │ │ └── tapeagents_rollouts.py │ ├── guessing │ │ ├── __init__.py │ │ └── guessing.py │ ├── math │ │ ├── __init__.py │ │ ├── load_datasets.py │ │ ├── rollouts.py │ │ └── verifier_api.py │ └── miniwob │ │ ├── README.md │ │ ├── agent.py │ │ ├── environment.py │ │ ├── environment_server.py │ │ ├── load_tasks.py │ │ ├── prompts.py │ │ ├── rollouts.py │ │ ├── run_finetune.py │ │ ├── steps.py │ │ └── utils.py ├── entrypoints │ ├── run_actor.py │ ├── run_environment.py │ ├── run_finetune.py │ ├── run_preprocess.py │ ├── run_vllm0.py │ └── run_vllm1.py ├── finetune │ ├── __init__.py │ ├── checkpoints.py │ ├── context.py │ ├── data.py │ ├── eval.py │ ├── logging_.py │ ├── lora.py │ ├── optim.py │ ├── rl │ │ ├── __init__.py │ │ └── utils.py │ ├── types.py │ ├── utils.py │ └── value_model.py ├── finetune_loop.py ├── gather_jsons.py ├── launch.py ├── llm.py ├── preprocess.py ├── processor_factory.py ├── requirements.now-reasoner-reward.txt ├── rollouts.py ├── shared_memory_array.py ├── state.py ├── streams.py ├── torch_utils.py ├── utils.py ├── vllm0.py ├── vllm1.py └── world.py └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/README.md -------------------------------------------------------------------------------- /assets/actor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/actor.png -------------------------------------------------------------------------------- /assets/figure1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/figure1.jpg -------------------------------------------------------------------------------- /assets/guessing_success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/guessing_success.png -------------------------------------------------------------------------------- /assets/logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/logs.png -------------------------------------------------------------------------------- /assets/losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/losses.png -------------------------------------------------------------------------------- /assets/rl_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/rl_loss.png -------------------------------------------------------------------------------- /assets/structure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/assets/structure.jpg -------------------------------------------------------------------------------- /conf/accelerate/base_mp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/base_mp.yaml -------------------------------------------------------------------------------- /conf/accelerate/base_nomp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/base_nomp.yaml -------------------------------------------------------------------------------- /conf/accelerate/deepspeed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/deepspeed.yaml -------------------------------------------------------------------------------- /conf/accelerate/fsdp_mp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/fsdp_mp.yaml -------------------------------------------------------------------------------- /conf/accelerate/fsdp_mp_grad_op.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/fsdp_mp_grad_op.yaml -------------------------------------------------------------------------------- /conf/accelerate/fsdp_nomp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/accelerate/fsdp_nomp.yaml -------------------------------------------------------------------------------- /conf/actor/web.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/actor/web.yaml -------------------------------------------------------------------------------- /conf/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/base.yaml -------------------------------------------------------------------------------- /conf/base_4gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/base_4gpu.yaml -------------------------------------------------------------------------------- /conf/chartqa.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/chartqa.yaml -------------------------------------------------------------------------------- /conf/counting.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/counting.yaml -------------------------------------------------------------------------------- /conf/counting_tapeagent.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/counting_tapeagent.yaml -------------------------------------------------------------------------------- /conf/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/debug.yaml -------------------------------------------------------------------------------- /conf/deepscaler15b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepscaler15b.yaml -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage1.json -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage1_bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage1_bf16.json -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage2_bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage2_bf16.json -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage3.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage3.json -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage3_bf16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage3_bf16.json -------------------------------------------------------------------------------- /conf/deepspeed/deepspeed_stage3_bf16_group4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/deepspeed/deepspeed_stage3_bf16_group4.json -------------------------------------------------------------------------------- /conf/finetune/actor_critic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/finetune/actor_critic.yaml -------------------------------------------------------------------------------- /conf/finetune/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/finetune/base.yaml -------------------------------------------------------------------------------- /conf/finetune/grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/finetune/grpo.yaml -------------------------------------------------------------------------------- /conf/finetune/ppo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/finetune/ppo.yaml -------------------------------------------------------------------------------- /conf/guessing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/guessing.yaml -------------------------------------------------------------------------------- /conf/math.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/math.yaml -------------------------------------------------------------------------------- /conf/mcp/web.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/mcp/web.json -------------------------------------------------------------------------------- /conf/miniwob.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/miniwob.yaml -------------------------------------------------------------------------------- /conf/miniwob_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/miniwob_grpo.yaml -------------------------------------------------------------------------------- /conf/miniwob_uic_grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/miniwob_uic_grpo.yaml -------------------------------------------------------------------------------- /conf/miniwob_uic_ppo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/miniwob_uic_ppo.yaml -------------------------------------------------------------------------------- /conf/rewards/alex.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/alex.yaml -------------------------------------------------------------------------------- /conf/rewards/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/base.yaml -------------------------------------------------------------------------------- /conf/rewards/dima.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/dima.yaml -------------------------------------------------------------------------------- /conf/rewards/format.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/format.yaml -------------------------------------------------------------------------------- /conf/rewards/pure_success.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/pure_success.yaml -------------------------------------------------------------------------------- /conf/rewards/success.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/success.yaml -------------------------------------------------------------------------------- /conf/rewards/success_and_format.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/success_and_format.yaml -------------------------------------------------------------------------------- /conf/rewards/xiaoyin.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/rewards/xiaoyin.yaml -------------------------------------------------------------------------------- /conf/streams/files.yaml: -------------------------------------------------------------------------------- 1 | backend: files -------------------------------------------------------------------------------- /conf/streams/redis.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/streams/redis.yaml -------------------------------------------------------------------------------- /conf/test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/conf/test.yaml -------------------------------------------------------------------------------- /pipelinerl/actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/actor.py -------------------------------------------------------------------------------- /pipelinerl/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/architecture.png -------------------------------------------------------------------------------- /pipelinerl/async_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/async_llm.py -------------------------------------------------------------------------------- /pipelinerl/browse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/browse.py -------------------------------------------------------------------------------- /pipelinerl/countdown_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/countdown_utils.py -------------------------------------------------------------------------------- /pipelinerl/domains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/README.md -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/__init__.py -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/chartqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/chartqa.py -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/evaluation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/evaluation.py -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/learning_curve.png -------------------------------------------------------------------------------- /pipelinerl/domains/chartqa/load_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/chartqa/load_datasets.py -------------------------------------------------------------------------------- /pipelinerl/domains/counting/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/counting/__init__.py -------------------------------------------------------------------------------- /pipelinerl/domains/counting/counting.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/counting/counting.py -------------------------------------------------------------------------------- /pipelinerl/domains/counting/tapeagent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/counting/tapeagent.py -------------------------------------------------------------------------------- /pipelinerl/domains/counting/test_counting_problems.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/counting/test_counting_problems.json -------------------------------------------------------------------------------- /pipelinerl/domains/counting/train_counting_problems.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/counting/train_counting_problems.json -------------------------------------------------------------------------------- /pipelinerl/domains/deep_research/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pipelinerl/domains/deep_research/tapeagents_rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/deep_research/tapeagents_rollouts.py -------------------------------------------------------------------------------- /pipelinerl/domains/guessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/guessing/__init__.py -------------------------------------------------------------------------------- /pipelinerl/domains/guessing/guessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/guessing/guessing.py -------------------------------------------------------------------------------- /pipelinerl/domains/math/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/math/__init__.py -------------------------------------------------------------------------------- /pipelinerl/domains/math/load_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/math/load_datasets.py -------------------------------------------------------------------------------- /pipelinerl/domains/math/rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/math/rollouts.py -------------------------------------------------------------------------------- /pipelinerl/domains/math/verifier_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/math/verifier_api.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/README.md -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/agent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/agent.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/environment.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/environment_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/environment_server.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/load_tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/load_tasks.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/prompts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/prompts.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/rollouts.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/run_finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/run_finetune.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/steps.py -------------------------------------------------------------------------------- /pipelinerl/domains/miniwob/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/domains/miniwob/utils.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_actor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_actor.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_environment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_environment.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_finetune.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_preprocess.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_vllm0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_vllm0.py -------------------------------------------------------------------------------- /pipelinerl/entrypoints/run_vllm1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/entrypoints/run_vllm1.py -------------------------------------------------------------------------------- /pipelinerl/finetune/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Finetuning LLMs with tape data. 3 | """ 4 | -------------------------------------------------------------------------------- /pipelinerl/finetune/checkpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/checkpoints.py -------------------------------------------------------------------------------- /pipelinerl/finetune/context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/context.py -------------------------------------------------------------------------------- /pipelinerl/finetune/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/data.py -------------------------------------------------------------------------------- /pipelinerl/finetune/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/eval.py -------------------------------------------------------------------------------- /pipelinerl/finetune/logging_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/logging_.py -------------------------------------------------------------------------------- /pipelinerl/finetune/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/lora.py -------------------------------------------------------------------------------- /pipelinerl/finetune/optim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/optim.py -------------------------------------------------------------------------------- /pipelinerl/finetune/rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/rl/__init__.py -------------------------------------------------------------------------------- /pipelinerl/finetune/rl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/rl/utils.py -------------------------------------------------------------------------------- /pipelinerl/finetune/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/types.py -------------------------------------------------------------------------------- /pipelinerl/finetune/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/utils.py -------------------------------------------------------------------------------- /pipelinerl/finetune/value_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune/value_model.py -------------------------------------------------------------------------------- /pipelinerl/finetune_loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/finetune_loop.py -------------------------------------------------------------------------------- /pipelinerl/gather_jsons.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/gather_jsons.py -------------------------------------------------------------------------------- /pipelinerl/launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/launch.py -------------------------------------------------------------------------------- /pipelinerl/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/llm.py -------------------------------------------------------------------------------- /pipelinerl/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/preprocess.py -------------------------------------------------------------------------------- /pipelinerl/processor_factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/processor_factory.py -------------------------------------------------------------------------------- /pipelinerl/requirements.now-reasoner-reward.txt: -------------------------------------------------------------------------------- 1 | math_verify==0.4.1 -------------------------------------------------------------------------------- /pipelinerl/rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/rollouts.py -------------------------------------------------------------------------------- /pipelinerl/shared_memory_array.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/shared_memory_array.py -------------------------------------------------------------------------------- /pipelinerl/state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/state.py -------------------------------------------------------------------------------- /pipelinerl/streams.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/streams.py -------------------------------------------------------------------------------- /pipelinerl/torch_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/torch_utils.py -------------------------------------------------------------------------------- /pipelinerl/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/utils.py -------------------------------------------------------------------------------- /pipelinerl/vllm0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/vllm0.py -------------------------------------------------------------------------------- /pipelinerl/vllm1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/vllm1.py -------------------------------------------------------------------------------- /pipelinerl/world.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pipelinerl/world.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ServiceNow/PipelineRL/HEAD/pyproject.toml --------------------------------------------------------------------------------