├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── README_zh-CN.md
├── assets
    ├── intro.jpeg
    ├── logo.png
    └── result.png
├── examples
    ├── data_preprocess
    │   ├── am_general.py
    │   ├── full_hh_rlhf.py
    │   └── math.py
    ├── ppo
    │   ├── llama3-8b_general.sh
    │   ├── llama3-8b_hh-rlhf.sh
    │   ├── llama3-8b_math.sh
    │   ├── qwen2_5-7b_general.sh
    │   ├── qwen2_5-7b_hh-rlhf.sh
    │   ├── qwen2_5-7b_math.sh
    │   ├── qwen3-8b_general.sh
    │   ├── qwen3-8b_hh-rlhf.sh
    │   └── qwen3-8b_math.sh
    └── xtuner_configs
    │   ├── POLAR_1_8B_full_varlenattn_custom_dataset.py
    │   └── POLAR_7B_full_varlenattn_custom_dataset.py
└── src
    └── polar
        ├── __init__.py
        └── reward_func.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/.gitignore


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/.gitmodules


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/README.md


--------------------------------------------------------------------------------
/README_zh-CN.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/README_zh-CN.md


--------------------------------------------------------------------------------
/assets/intro.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/intro.jpeg


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/logo.png


--------------------------------------------------------------------------------
/assets/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/assets/result.png


--------------------------------------------------------------------------------
/examples/data_preprocess/am_general.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/am_general.py


--------------------------------------------------------------------------------
/examples/data_preprocess/full_hh_rlhf.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/full_hh_rlhf.py


--------------------------------------------------------------------------------
/examples/data_preprocess/math.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/data_preprocess/math.py


--------------------------------------------------------------------------------
/examples/ppo/llama3-8b_general.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_general.sh


--------------------------------------------------------------------------------
/examples/ppo/llama3-8b_hh-rlhf.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_hh-rlhf.sh


--------------------------------------------------------------------------------
/examples/ppo/llama3-8b_math.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/llama3-8b_math.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen2_5-7b_general.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_general.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen2_5-7b_hh-rlhf.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_hh-rlhf.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen2_5-7b_math.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen2_5-7b_math.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen3-8b_general.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_general.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen3-8b_hh-rlhf.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_hh-rlhf.sh


--------------------------------------------------------------------------------
/examples/ppo/qwen3-8b_math.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/ppo/qwen3-8b_math.sh


--------------------------------------------------------------------------------
/examples/xtuner_configs/POLAR_1_8B_full_varlenattn_custom_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/xtuner_configs/POLAR_1_8B_full_varlenattn_custom_dataset.py


--------------------------------------------------------------------------------
/examples/xtuner_configs/POLAR_7B_full_varlenattn_custom_dataset.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/examples/xtuner_configs/POLAR_7B_full_varlenattn_custom_dataset.py


--------------------------------------------------------------------------------
/src/polar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/src/polar/__init__.py


--------------------------------------------------------------------------------
/src/polar/reward_func.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/InternLM/POLAR/HEAD/src/polar/reward_func.py


--------------------------------------------------------------------------------