├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── README_zh-CN.md ├── assets ├── intro.jpeg ├── logo.png └── result.png ├── examples ├── data_preprocess │ ├── am_general.py │ ├── full_hh_rlhf.py │ └── math.py ├── ppo │ ├── llama3-8b_general.sh │ ├── llama3-8b_hh-rlhf.sh │ ├── llama3-8b_math.sh │ ├── qwen2_5-7b_general.sh │ ├── qwen2_5-7b_hh-rlhf.sh │ ├── qwen2_5-7b_math.sh │ ├── qwen3-8b_general.sh │ ├── qwen3-8b_hh-rlhf.sh │ └── qwen3-8b_math.sh └── xtuner_configs │ ├── POLAR_1_8B_full_varlenattn_custom_dataset.py │ └── POLAR_7B_full_varlenattn_custom_dataset.py └── src └── polar ├── __init__.py └── reward_func.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/README.md -------------------------------------------------------------------------------- /README_zh-CN.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/README_zh-CN.md -------------------------------------------------------------------------------- /assets/intro.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/intro.jpeg -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/result.png -------------------------------------------------------------------------------- /examples/data_preprocess/am_general.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/am_general.py -------------------------------------------------------------------------------- /examples/data_preprocess/full_hh_rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/full_hh_rlhf.py -------------------------------------------------------------------------------- /examples/data_preprocess/math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/math.py -------------------------------------------------------------------------------- /examples/ppo/llama3-8b_general.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_general.sh -------------------------------------------------------------------------------- /examples/ppo/llama3-8b_hh-rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_hh-rlhf.sh -------------------------------------------------------------------------------- /examples/ppo/llama3-8b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_math.sh -------------------------------------------------------------------------------- /examples/ppo/qwen2_5-7b_general.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_general.sh -------------------------------------------------------------------------------- /examples/ppo/qwen2_5-7b_hh-rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_hh-rlhf.sh -------------------------------------------------------------------------------- /examples/ppo/qwen2_5-7b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_math.sh -------------------------------------------------------------------------------- /examples/ppo/qwen3-8b_general.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_general.sh -------------------------------------------------------------------------------- /examples/ppo/qwen3-8b_hh-rlhf.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_hh-rlhf.sh -------------------------------------------------------------------------------- /examples/ppo/qwen3-8b_math.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_math.sh -------------------------------------------------------------------------------- /examples/xtuner_configs/POLAR_1_8B_full_varlenattn_custom_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/xtuner_configs/POLAR_1_8B_full_varlenattn_custom_dataset.py -------------------------------------------------------------------------------- /examples/xtuner_configs/POLAR_7B_full_varlenattn_custom_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/xtuner_configs/POLAR_7B_full_varlenattn_custom_dataset.py -------------------------------------------------------------------------------- /src/polar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/src/polar/__init__.py -------------------------------------------------------------------------------- /src/polar/reward_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/InternLM/POLAR/HEAD/src/polar/reward_func.py --------------------------------------------------------------------------------