├── .github
    └── workflows
    │   ├── build_documentation.yml
    │   ├── build_pr_documentation.yml
    │   ├── quality.yml
    │   ├── tests.yml
    │   └── upload_pr_documentation.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── Makefile
├── README.md
├── assets
    └── handbook.png
├── chapters
    └── en
    │   ├── _toctree.yml
    │   └── chapter0
    │       └── introduction.mdx
├── recipes
    ├── accelerate_configs
    │   ├── deepspeed_zero3.yaml
    │   ├── fsdp.yaml
    │   ├── fsdp_qlora.yaml
    │   └── multi_gpu.yaml
    ├── constitutional-ai
    │   ├── README.md
    │   ├── dpo
    │   │   └── config_anthropic.yaml
    │   └── sft
    │   │   ├── config_anthropic.yaml
    │   │   └── config_grok.yaml
    ├── gpt2-nl
    │   ├── README.md
    │   ├── cpt
    │   │   └── config_full.yaml
    │   ├── dpo
    │   │   └── config_full.yaml
    │   └── sft
    │   │   └── config_full.yaml
    ├── launch.slurm
    ├── pref_align_scan
    │   ├── README.md
    │   ├── dpo
    │   │   ├── config_openhermes.yaml
    │   │   └── config_zephyr.yaml
    │   └── launch_scan.sh
    ├── smollm
    │   ├── README.md
    │   └── sft
    │   │   └── config.yaml
    ├── starchat2-15b
    │   ├── README.md
    │   ├── dpo
    │   │   └── config_v0.1.yaml
    │   └── sft
    │   │   └── config_v0.1.yaml
    ├── zephyr-141b-A35b
    │   ├── README.md
    │   └── orpo
    │   │   └── config_full.yaml
    ├── zephyr-7b-beta
    │   ├── README.md
    │   ├── dpo
    │   │   ├── config_full.yaml
    │   │   └── config_qlora.yaml
    │   └── sft
    │   │   ├── config_full.yaml
    │   │   └── config_qlora.yaml
    └── zephyr-7b-gemma
    │   ├── README.md
    │   ├── dpo
    │       └── config_full.yaml
    │   └── sft
    │       └── config_full.yaml
├── scripts
    ├── README.md
    ├── run_cpt.py
    ├── run_dpo.py
    ├── run_orpo.py
    └── run_sft.py
├── setup.cfg
├── setup.py
├── src
    └── alignment
    │   ├── __init__.py
    │   ├── configs.py
    │   ├── data.py
    │   ├── decontaminate.py
    │   ├── model_utils.py
    │   └── release.py
└── tests
    ├── __init__.py
    ├── fixtures
        ├── config_dpo_full.yaml
        └── config_sft_full.yaml
    ├── test_configs.py
    ├── test_data.py
    ├── test_decontaminate.py
    └── test_model_utils.py


/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/build_documentation.yml


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/build_pr_documentation.yml


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/quality.yml


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/tests.yml


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.github/workflows/upload_pr_documentation.yml


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/.gitignore


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/CITATION.cff


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/LICENSE


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/Makefile


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/README.md


--------------------------------------------------------------------------------
/assets/handbook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/assets/handbook.png


--------------------------------------------------------------------------------
/chapters/en/_toctree.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/chapters/en/_toctree.yml


--------------------------------------------------------------------------------
/chapters/en/chapter0/introduction.mdx:
--------------------------------------------------------------------------------
1 | # Welcome to the RLHF Handbook!
2 | 
3 | Stay tuned for more details 🤗


--------------------------------------------------------------------------------
/recipes/accelerate_configs/deepspeed_zero3.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/deepspeed_zero3.yaml


--------------------------------------------------------------------------------
/recipes/accelerate_configs/fsdp.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/fsdp.yaml


--------------------------------------------------------------------------------
/recipes/accelerate_configs/fsdp_qlora.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/fsdp_qlora.yaml


--------------------------------------------------------------------------------
/recipes/accelerate_configs/multi_gpu.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/accelerate_configs/multi_gpu.yaml


--------------------------------------------------------------------------------
/recipes/constitutional-ai/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/README.md


--------------------------------------------------------------------------------
/recipes/constitutional-ai/dpo/config_anthropic.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/dpo/config_anthropic.yaml


--------------------------------------------------------------------------------
/recipes/constitutional-ai/sft/config_anthropic.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/sft/config_anthropic.yaml


--------------------------------------------------------------------------------
/recipes/constitutional-ai/sft/config_grok.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/constitutional-ai/sft/config_grok.yaml


--------------------------------------------------------------------------------
/recipes/gpt2-nl/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/README.md


--------------------------------------------------------------------------------
/recipes/gpt2-nl/cpt/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/cpt/config_full.yaml


--------------------------------------------------------------------------------
/recipes/gpt2-nl/dpo/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/dpo/config_full.yaml


--------------------------------------------------------------------------------
/recipes/gpt2-nl/sft/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/gpt2-nl/sft/config_full.yaml


--------------------------------------------------------------------------------
/recipes/launch.slurm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/launch.slurm


--------------------------------------------------------------------------------
/recipes/pref_align_scan/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/README.md


--------------------------------------------------------------------------------
/recipes/pref_align_scan/dpo/config_openhermes.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/dpo/config_openhermes.yaml


--------------------------------------------------------------------------------
/recipes/pref_align_scan/dpo/config_zephyr.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/dpo/config_zephyr.yaml


--------------------------------------------------------------------------------
/recipes/pref_align_scan/launch_scan.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/pref_align_scan/launch_scan.sh


--------------------------------------------------------------------------------
/recipes/smollm/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/smollm/README.md


--------------------------------------------------------------------------------
/recipes/smollm/sft/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/smollm/sft/config.yaml


--------------------------------------------------------------------------------
/recipes/starchat2-15b/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/README.md


--------------------------------------------------------------------------------
/recipes/starchat2-15b/dpo/config_v0.1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/dpo/config_v0.1.yaml


--------------------------------------------------------------------------------
/recipes/starchat2-15b/sft/config_v0.1.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/starchat2-15b/sft/config_v0.1.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-141b-A35b/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-141b-A35b/README.md


--------------------------------------------------------------------------------
/recipes/zephyr-141b-A35b/orpo/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-141b-A35b/orpo/config_full.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-beta/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/README.md


--------------------------------------------------------------------------------
/recipes/zephyr-7b-beta/dpo/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/dpo/config_full.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-beta/dpo/config_qlora.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/dpo/config_qlora.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-beta/sft/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/sft/config_full.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-beta/sft/config_qlora.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-beta/sft/config_qlora.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-gemma/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/README.md


--------------------------------------------------------------------------------
/recipes/zephyr-7b-gemma/dpo/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/dpo/config_full.yaml


--------------------------------------------------------------------------------
/recipes/zephyr-7b-gemma/sft/config_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/recipes/zephyr-7b-gemma/sft/config_full.yaml


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/README.md


--------------------------------------------------------------------------------
/scripts/run_cpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_cpt.py


--------------------------------------------------------------------------------
/scripts/run_dpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_dpo.py


--------------------------------------------------------------------------------
/scripts/run_orpo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_orpo.py


--------------------------------------------------------------------------------
/scripts/run_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/scripts/run_sft.py


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/setup.cfg


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/setup.py


--------------------------------------------------------------------------------
/src/alignment/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/__init__.py


--------------------------------------------------------------------------------
/src/alignment/configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/configs.py


--------------------------------------------------------------------------------
/src/alignment/data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/data.py


--------------------------------------------------------------------------------
/src/alignment/decontaminate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/decontaminate.py


--------------------------------------------------------------------------------
/src/alignment/model_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/model_utils.py


--------------------------------------------------------------------------------
/src/alignment/release.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/src/alignment/release.py


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/fixtures/config_dpo_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/fixtures/config_dpo_full.yaml


--------------------------------------------------------------------------------
/tests/fixtures/config_sft_full.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/fixtures/config_sft_full.yaml


--------------------------------------------------------------------------------
/tests/test_configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_configs.py


--------------------------------------------------------------------------------
/tests/test_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_data.py


--------------------------------------------------------------------------------
/tests/test_decontaminate.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_decontaminate.py


--------------------------------------------------------------------------------
/tests/test_model_utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shareAI-lab/alignment-handbook-cn/HEAD/tests/test_model_utils.py


--------------------------------------------------------------------------------