├── .gitignore ├── LICENSE ├── README.md ├── curriculum-lens ├── curriculum-lens-qwen-2.5.sh ├── dpo_diagnostician.py ├── make_curricula.py ├── recipes │ ├── accelerate_configs │ │ ├── deepspeed_zero2.yaml │ │ ├── deepspeed_zero3.yaml │ │ ├── fsdp.yaml │ │ ├── fsdp_qlora.yaml │ │ └── multi_gpu.yaml │ ├── gemma-2-9b-argilla.yaml │ ├── gemma-2-9b-uf.yaml │ ├── llama-3-8b-argilla.yaml │ ├── llama-3-8b-uf.yaml │ ├── mistral-7b-argilla.yaml │ ├── mistral-7b-uf.yaml │ ├── qwen-2.5-7b-argilla.yaml │ └── qwen-2.5-7b-uf.yaml └── run_half.py ├── selective-dpo-illustration.jpg ├── selective-dpo-illustration.png └── selective-dpo ├── curricula ├── gemma_ultrafeedback_binarized_learning_order.csv ├── llama_ultrafeedback_binarized_learning_order.csv ├── mistral_ultrafeedback_binarized_learning_order.csv └── qwen-2.5-7b_ultrafeedback_binarized_learning_order.csv ├── recipes ├── accelerate_configs │ ├── deepspeed_zero2.yaml │ └── deepspeed_zero3.yaml ├── gemma-2-9b-uf-ob1.0-seed42.yaml ├── llama-3-8b-uf-ob1.0-seed42.yaml ├── mistral-7b-uf-ob1.0-seed42.yaml └── qwen-2.5-7b-uf-ob1.0-seed42.yaml ├── run-selectivedpo-uf.sh └── scripts ├── __pycache__ ├── curricula_dpo_trainer.cpython-310.pyc ├── simpo_config.cpython-310.pyc └── simpo_trainer.cpython-310.pyc ├── run_selective_dpo.py └── selective_dpo_trainer.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/README.md -------------------------------------------------------------------------------- /curriculum-lens/curriculum-lens-qwen-2.5.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/curriculum-lens-qwen-2.5.sh -------------------------------------------------------------------------------- /curriculum-lens/dpo_diagnostician.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/dpo_diagnostician.py -------------------------------------------------------------------------------- /curriculum-lens/make_curricula.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/make_curricula.py -------------------------------------------------------------------------------- /curriculum-lens/recipes/accelerate_configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/accelerate_configs/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/accelerate_configs/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/accelerate_configs/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/accelerate_configs/fsdp.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/accelerate_configs/fsdp_qlora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/accelerate_configs/fsdp_qlora.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/accelerate_configs/multi_gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/accelerate_configs/multi_gpu.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/gemma-2-9b-argilla.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/gemma-2-9b-argilla.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/gemma-2-9b-uf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/gemma-2-9b-uf.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/llama-3-8b-argilla.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/llama-3-8b-argilla.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/llama-3-8b-uf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/llama-3-8b-uf.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/mistral-7b-argilla.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/mistral-7b-argilla.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/mistral-7b-uf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/mistral-7b-uf.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/qwen-2.5-7b-argilla.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/qwen-2.5-7b-argilla.yaml -------------------------------------------------------------------------------- /curriculum-lens/recipes/qwen-2.5-7b-uf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/recipes/qwen-2.5-7b-uf.yaml -------------------------------------------------------------------------------- /curriculum-lens/run_half.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/curriculum-lens/run_half.py -------------------------------------------------------------------------------- /selective-dpo-illustration.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo-illustration.jpg -------------------------------------------------------------------------------- /selective-dpo-illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo-illustration.png -------------------------------------------------------------------------------- /selective-dpo/curricula/gemma_ultrafeedback_binarized_learning_order.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/curricula/gemma_ultrafeedback_binarized_learning_order.csv -------------------------------------------------------------------------------- /selective-dpo/curricula/llama_ultrafeedback_binarized_learning_order.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/curricula/llama_ultrafeedback_binarized_learning_order.csv -------------------------------------------------------------------------------- /selective-dpo/curricula/mistral_ultrafeedback_binarized_learning_order.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/curricula/mistral_ultrafeedback_binarized_learning_order.csv -------------------------------------------------------------------------------- /selective-dpo/curricula/qwen-2.5-7b_ultrafeedback_binarized_learning_order.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/curricula/qwen-2.5-7b_ultrafeedback_binarized_learning_order.csv -------------------------------------------------------------------------------- /selective-dpo/recipes/accelerate_configs/deepspeed_zero2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/accelerate_configs/deepspeed_zero2.yaml -------------------------------------------------------------------------------- /selective-dpo/recipes/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/accelerate_configs/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /selective-dpo/recipes/gemma-2-9b-uf-ob1.0-seed42.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/gemma-2-9b-uf-ob1.0-seed42.yaml -------------------------------------------------------------------------------- /selective-dpo/recipes/llama-3-8b-uf-ob1.0-seed42.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/llama-3-8b-uf-ob1.0-seed42.yaml -------------------------------------------------------------------------------- /selective-dpo/recipes/mistral-7b-uf-ob1.0-seed42.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/mistral-7b-uf-ob1.0-seed42.yaml -------------------------------------------------------------------------------- /selective-dpo/recipes/qwen-2.5-7b-uf-ob1.0-seed42.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/recipes/qwen-2.5-7b-uf-ob1.0-seed42.yaml -------------------------------------------------------------------------------- /selective-dpo/run-selectivedpo-uf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/run-selectivedpo-uf.sh -------------------------------------------------------------------------------- /selective-dpo/scripts/__pycache__/curricula_dpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/scripts/__pycache__/curricula_dpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /selective-dpo/scripts/__pycache__/simpo_config.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/scripts/__pycache__/simpo_config.cpython-310.pyc -------------------------------------------------------------------------------- /selective-dpo/scripts/__pycache__/simpo_trainer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/scripts/__pycache__/simpo_trainer.cpython-310.pyc -------------------------------------------------------------------------------- /selective-dpo/scripts/run_selective_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/scripts/run_selective_dpo.py -------------------------------------------------------------------------------- /selective-dpo/scripts/selective_dpo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glorgao/SelectiveDPO/HEAD/selective-dpo/scripts/selective_dpo_trainer.py --------------------------------------------------------------------------------