├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── accelerate_config ├── fsdp_2gpu.yaml ├── fsdp_2x2gpu.yaml ├── fsdp_2x4gpu.yaml ├── fsdp_2x8gpu.yaml ├── fsdp_4gpu.yaml └── fsdp_8gpu.yaml ├── alpaca_eval.txt ├── alpaca_eval_gpt-4.1-mini.yaml ├── alpaca_eval_gpt-4.1.yaml ├── config ├── config.yaml ├── loss │ ├── bradley-terry.yaml │ ├── cdpo.yaml │ ├── csft.yaml │ ├── dpo.yaml │ ├── grpo.yaml │ ├── ipo.yaml │ ├── kto.yaml │ ├── ppo.yaml │ ├── sft.yaml │ ├── simpo.yaml │ └── slic.yaml ├── model │ ├── base_model.yaml │ ├── gemma.yaml │ ├── llama.yaml │ ├── mistral.yaml │ ├── pythia.yaml │ └── qwen.yaml └── template.jinja ├── evals └── scripts │ └── summarize_metrics.py ├── examples ├── binary_feedback.json └── pairwise_feedback.json ├── install.sh ├── launch.py ├── scripts ├── interactive_ppo.sh ├── interactive_ppo_online.sh ├── launch_llama_dpo.sh ├── launch_llama_dpo_online.sh ├── launch_llama_instruct_dpo.sh ├── launch_llama_instruct_dpo_humanline.sh ├── launch_llama_instruct_dpo_online.sh ├── launch_llama_instruct_dpo_online_humanline.sh ├── launch_llama_instruct_grpo.sh ├── launch_llama_instruct_grpo_humanline.sh ├── launch_llama_instruct_grpo_online.sh ├── launch_llama_instruct_grpo_online_humanline.sh ├── launch_llama_instruct_kto.sh ├── launch_llama_instruct_kto_humanline.sh ├── launch_llama_instruct_kto_humanline_gemma.sh ├── launch_llama_instruct_kto_online.sh ├── launch_llama_instruct_kto_online_humanline.sh ├── launch_llama_kto.sh ├── launch_llama_ppo.sh ├── launch_llama_ppo_online.sh ├── launch_llama_sft.sh ├── launch_llama_sft_kto.sh ├── launch_multinode_batch.sh ├── launch_multinode_interactive.sh └── launch_singlenode_batch.sh └── train ├── __init__.py ├── data.py ├── dataloader.py ├── label.py ├── models.py ├── sample.py ├── trainers.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import train -------------------------------------------------------------------------------- /accelerate_config/fsdp_2gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_2gpu.yaml -------------------------------------------------------------------------------- /accelerate_config/fsdp_2x2gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_2x2gpu.yaml -------------------------------------------------------------------------------- /accelerate_config/fsdp_2x4gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_2x4gpu.yaml -------------------------------------------------------------------------------- /accelerate_config/fsdp_2x8gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_2x8gpu.yaml -------------------------------------------------------------------------------- /accelerate_config/fsdp_4gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_4gpu.yaml -------------------------------------------------------------------------------- /accelerate_config/fsdp_8gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/accelerate_config/fsdp_8gpu.yaml -------------------------------------------------------------------------------- /alpaca_eval.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/alpaca_eval.txt -------------------------------------------------------------------------------- /alpaca_eval_gpt-4.1-mini.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/alpaca_eval_gpt-4.1-mini.yaml -------------------------------------------------------------------------------- /alpaca_eval_gpt-4.1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/alpaca_eval_gpt-4.1.yaml -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/config.yaml -------------------------------------------------------------------------------- /config/loss/bradley-terry.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/bradley-terry.yaml -------------------------------------------------------------------------------- /config/loss/cdpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/cdpo.yaml -------------------------------------------------------------------------------- /config/loss/csft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/csft.yaml -------------------------------------------------------------------------------- /config/loss/dpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/dpo.yaml -------------------------------------------------------------------------------- /config/loss/grpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/grpo.yaml -------------------------------------------------------------------------------- /config/loss/ipo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/ipo.yaml -------------------------------------------------------------------------------- /config/loss/kto.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/kto.yaml -------------------------------------------------------------------------------- /config/loss/ppo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/ppo.yaml -------------------------------------------------------------------------------- /config/loss/sft.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/sft.yaml -------------------------------------------------------------------------------- /config/loss/simpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/simpo.yaml -------------------------------------------------------------------------------- /config/loss/slic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/loss/slic.yaml -------------------------------------------------------------------------------- /config/model/base_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/base_model.yaml -------------------------------------------------------------------------------- /config/model/gemma.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/gemma.yaml -------------------------------------------------------------------------------- /config/model/llama.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/llama.yaml -------------------------------------------------------------------------------- /config/model/mistral.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/mistral.yaml -------------------------------------------------------------------------------- /config/model/pythia.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/pythia.yaml -------------------------------------------------------------------------------- /config/model/qwen.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/model/qwen.yaml -------------------------------------------------------------------------------- /config/template.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/config/template.jinja -------------------------------------------------------------------------------- /evals/scripts/summarize_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/evals/scripts/summarize_metrics.py -------------------------------------------------------------------------------- /examples/binary_feedback.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/examples/binary_feedback.json -------------------------------------------------------------------------------- /examples/pairwise_feedback.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/examples/pairwise_feedback.json -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/install.sh -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/launch.py -------------------------------------------------------------------------------- /scripts/interactive_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/interactive_ppo.sh -------------------------------------------------------------------------------- /scripts/interactive_ppo_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/interactive_ppo_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_dpo.sh -------------------------------------------------------------------------------- /scripts/launch_llama_dpo_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_dpo_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_dpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_dpo.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_dpo_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_dpo_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_dpo_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_dpo_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_dpo_online_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_dpo_online_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_grpo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_grpo.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_grpo_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_grpo_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_grpo_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_grpo_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_grpo_online_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_grpo_online_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_kto.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_kto.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_kto_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_kto_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_kto_humanline_gemma.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_kto_humanline_gemma.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_kto_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_kto_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_instruct_kto_online_humanline.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_instruct_kto_online_humanline.sh -------------------------------------------------------------------------------- /scripts/launch_llama_kto.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_kto.sh -------------------------------------------------------------------------------- /scripts/launch_llama_ppo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_ppo.sh -------------------------------------------------------------------------------- /scripts/launch_llama_ppo_online.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_ppo_online.sh -------------------------------------------------------------------------------- /scripts/launch_llama_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_sft.sh -------------------------------------------------------------------------------- /scripts/launch_llama_sft_kto.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_llama_sft_kto.sh -------------------------------------------------------------------------------- /scripts/launch_multinode_batch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_multinode_batch.sh -------------------------------------------------------------------------------- /scripts/launch_multinode_interactive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_multinode_interactive.sh -------------------------------------------------------------------------------- /scripts/launch_singlenode_batch.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/scripts/launch_singlenode_batch.sh -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/data.py -------------------------------------------------------------------------------- /train/dataloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/dataloader.py -------------------------------------------------------------------------------- /train/label.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/label.py -------------------------------------------------------------------------------- /train/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/models.py -------------------------------------------------------------------------------- /train/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/sample.py -------------------------------------------------------------------------------- /train/trainers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/trainers.py -------------------------------------------------------------------------------- /train/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContextualAI/HALOs/HEAD/train/utils.py --------------------------------------------------------------------------------