├── .github └── workflows │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── quality.yml │ ├── tests.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── Makefile ├── README.md ├── assets └── handbook.png ├── chapters └── en │ ├── _toctree.yml │ └── chapter0 │ └── introduction.mdx ├── recipes ├── accelerate_configs │ ├── deepspeed_zero3.yaml │ ├── fsdp.yaml │ ├── fsdp_qlora.yaml │ └── multi_gpu.yaml ├── constitutional-ai │ ├── README.md │ ├── dpo │ │ └── config_anthropic.yaml │ └── sft │ │ ├── config_anthropic.yaml │ │ └── config_grok.yaml ├── gpt2-nl │ ├── README.md │ ├── cpt │ │ └── config_full.yaml │ ├── dpo │ │ └── config_full.yaml │ └── sft │ │ └── config_full.yaml ├── launch.slurm ├── pref_align_scan │ ├── README.md │ ├── dpo │ │ ├── config_openhermes.yaml │ │ └── config_zephyr.yaml │ └── launch_scan.sh ├── smollm │ ├── README.md │ └── sft │ │ └── config.yaml ├── starchat2-15b │ ├── README.md │ ├── dpo │ │ └── config_v0.1.yaml │ └── sft │ │ └── config_v0.1.yaml ├── zephyr-141b-A35b │ ├── README.md │ └── orpo │ │ └── config_full.yaml ├── zephyr-7b-beta │ ├── README.md │ ├── dpo │ │ ├── config_full.yaml │ │ └── config_qlora.yaml │ └── sft │ │ ├── config_full.yaml │ │ └── config_qlora.yaml └── zephyr-7b-gemma │ ├── README.md │ ├── dpo │ └── config_full.yaml │ └── sft │ └── config_full.yaml ├── scripts ├── README.md ├── run_cpt.py ├── run_dpo.py ├── run_orpo.py └── run_sft.py ├── setup.cfg ├── setup.py ├── src └── alignment │ ├── __init__.py │ ├── configs.py │ ├── data.py │ ├── decontaminate.py │ ├── model_utils.py │ └── release.py └── tests ├── __init__.py ├── fixtures ├── config_dpo_full.yaml └── config_sft_full.yaml ├── test_configs.py ├── test_data.py ├── test_decontaminate.py └── test_model_utils.py /.github/workflows/build_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/build_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/build_pr_documentation.yml -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/quality.yml -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/tests.yml -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/upload_pr_documentation.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.gitignore -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/CITATION.cff -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/README.md -------------------------------------------------------------------------------- /assets/handbook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/assets/handbook.png -------------------------------------------------------------------------------- /chapters/en/_toctree.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/chapters/en/_toctree.yml -------------------------------------------------------------------------------- /chapters/en/chapter0/introduction.mdx: -------------------------------------------------------------------------------- 1 | # Welcome to the RLHF Handbook! 2 | 3 | Stay tuned for more details 🤗 -------------------------------------------------------------------------------- /recipes/accelerate_configs/deepspeed_zero3.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/deepspeed_zero3.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/fsdp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/fsdp.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/fsdp_qlora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/fsdp_qlora.yaml -------------------------------------------------------------------------------- /recipes/accelerate_configs/multi_gpu.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/multi_gpu.yaml -------------------------------------------------------------------------------- /recipes/constitutional-ai/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/README.md -------------------------------------------------------------------------------- /recipes/constitutional-ai/dpo/config_anthropic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/dpo/config_anthropic.yaml -------------------------------------------------------------------------------- /recipes/constitutional-ai/sft/config_anthropic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/sft/config_anthropic.yaml -------------------------------------------------------------------------------- /recipes/constitutional-ai/sft/config_grok.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/sft/config_grok.yaml -------------------------------------------------------------------------------- /recipes/gpt2-nl/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/README.md -------------------------------------------------------------------------------- /recipes/gpt2-nl/cpt/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/cpt/config_full.yaml -------------------------------------------------------------------------------- /recipes/gpt2-nl/dpo/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/dpo/config_full.yaml -------------------------------------------------------------------------------- /recipes/gpt2-nl/sft/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/sft/config_full.yaml -------------------------------------------------------------------------------- /recipes/launch.slurm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/launch.slurm -------------------------------------------------------------------------------- /recipes/pref_align_scan/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/README.md -------------------------------------------------------------------------------- /recipes/pref_align_scan/dpo/config_openhermes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/dpo/config_openhermes.yaml -------------------------------------------------------------------------------- /recipes/pref_align_scan/dpo/config_zephyr.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/dpo/config_zephyr.yaml -------------------------------------------------------------------------------- /recipes/pref_align_scan/launch_scan.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/launch_scan.sh -------------------------------------------------------------------------------- /recipes/smollm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/smollm/README.md -------------------------------------------------------------------------------- /recipes/smollm/sft/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/smollm/sft/config.yaml -------------------------------------------------------------------------------- /recipes/starchat2-15b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/README.md -------------------------------------------------------------------------------- /recipes/starchat2-15b/dpo/config_v0.1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/dpo/config_v0.1.yaml -------------------------------------------------------------------------------- /recipes/starchat2-15b/sft/config_v0.1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/sft/config_v0.1.yaml -------------------------------------------------------------------------------- /recipes/zephyr-141b-A35b/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-141b-A35b/README.md -------------------------------------------------------------------------------- /recipes/zephyr-141b-A35b/orpo/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-141b-A35b/orpo/config_full.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-beta/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/README.md -------------------------------------------------------------------------------- /recipes/zephyr-7b-beta/dpo/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/dpo/config_full.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-beta/dpo/config_qlora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/dpo/config_qlora.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-beta/sft/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/sft/config_full.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-beta/sft/config_qlora.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/sft/config_qlora.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-gemma/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/README.md -------------------------------------------------------------------------------- /recipes/zephyr-7b-gemma/dpo/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/dpo/config_full.yaml -------------------------------------------------------------------------------- /recipes/zephyr-7b-gemma/sft/config_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/sft/config_full.yaml -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/README.md -------------------------------------------------------------------------------- /scripts/run_cpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_cpt.py -------------------------------------------------------------------------------- /scripts/run_dpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_dpo.py -------------------------------------------------------------------------------- /scripts/run_orpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_orpo.py -------------------------------------------------------------------------------- /scripts/run_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_sft.py -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/setup.py -------------------------------------------------------------------------------- /src/alignment/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/__init__.py -------------------------------------------------------------------------------- /src/alignment/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/configs.py -------------------------------------------------------------------------------- /src/alignment/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/data.py -------------------------------------------------------------------------------- /src/alignment/decontaminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/decontaminate.py -------------------------------------------------------------------------------- /src/alignment/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/model_utils.py -------------------------------------------------------------------------------- /src/alignment/release.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/release.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/fixtures/config_dpo_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/fixtures/config_dpo_full.yaml -------------------------------------------------------------------------------- /tests/fixtures/config_sft_full.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/fixtures/config_sft_full.yaml -------------------------------------------------------------------------------- /tests/test_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_configs.py -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_decontaminate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_decontaminate.py -------------------------------------------------------------------------------- /tests/test_model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_model_utils.py --------------------------------------------------------------------------------