├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── configs ├── accelerate_config.yaml ├── accelerate_config_simple.yaml ├── config.yaml ├── config_rl.yaml ├── config_rm.yaml ├── deepspeed_rl.json └── ppo_config.yaml ├── models └── .gitignore ├── pyproject.toml ├── runs └── .gitignore └── src ├── __init__.py ├── bon ├── README.md ├── __init__.py ├── bon_sampling.py ├── ensemble_rm.py ├── run_bon_ensembles.py ├── run_bon_pipeline.py └── utils.py ├── data_utils ├── README.md ├── __init__.py ├── oa_custom_datasets │ ├── __init__.py │ ├── dataset_loader.py │ ├── get_dataset_patch.py │ └── rank_datasets.py └── rm_dataset_formatter.py ├── ppo ├── README.md ├── __init__.py ├── custom_helpers.py ├── custom_trlx_trainers │ ├── __init__.py │ ├── custom_accelerate_base_trainer.py │ └── custom_accelerate_ppo_trainer.py ├── run_ppo_gold_eval.py └── trainer_rl.py ├── reward_modeling ├── scoring │ ├── __init__.py │ ├── ppo_reward_functions.py │ └── score.py └── training │ ├── README.md │ ├── __init__.py │ └── trainer_rm.py ├── sft ├── __init__.py └── trainer_sft.py └── utils ├── merge_seeds_script.py └── merge_utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/README.md -------------------------------------------------------------------------------- /configs/accelerate_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/accelerate_config.yaml -------------------------------------------------------------------------------- /configs/accelerate_config_simple.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/accelerate_config_simple.yaml -------------------------------------------------------------------------------- /configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/config.yaml -------------------------------------------------------------------------------- /configs/config_rl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/config_rl.yaml -------------------------------------------------------------------------------- /configs/config_rm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/config_rm.yaml -------------------------------------------------------------------------------- /configs/deepspeed_rl.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/deepspeed_rl.json -------------------------------------------------------------------------------- /configs/ppo_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/configs/ppo_config.yaml -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/pyproject.toml -------------------------------------------------------------------------------- /runs/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bon/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/README.md -------------------------------------------------------------------------------- /src/bon/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bon/bon_sampling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/bon_sampling.py -------------------------------------------------------------------------------- /src/bon/ensemble_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/ensemble_rm.py -------------------------------------------------------------------------------- /src/bon/run_bon_ensembles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/run_bon_ensembles.py -------------------------------------------------------------------------------- /src/bon/run_bon_pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/run_bon_pipeline.py -------------------------------------------------------------------------------- /src/bon/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/bon/utils.py -------------------------------------------------------------------------------- /src/data_utils/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/data_utils/README.md -------------------------------------------------------------------------------- /src/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_utils/oa_custom_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/data_utils/oa_custom_datasets/dataset_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/data_utils/oa_custom_datasets/dataset_loader.py -------------------------------------------------------------------------------- /src/data_utils/oa_custom_datasets/get_dataset_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/data_utils/oa_custom_datasets/get_dataset_patch.py -------------------------------------------------------------------------------- /src/data_utils/oa_custom_datasets/rank_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/data_utils/oa_custom_datasets/rank_datasets.py -------------------------------------------------------------------------------- /src/data_utils/rm_dataset_formatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/data_utils/rm_dataset_formatter.py -------------------------------------------------------------------------------- /src/ppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/README.md -------------------------------------------------------------------------------- /src/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ppo/custom_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/custom_helpers.py -------------------------------------------------------------------------------- /src/ppo/custom_trlx_trainers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/ppo/custom_trlx_trainers/custom_accelerate_base_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/custom_trlx_trainers/custom_accelerate_base_trainer.py -------------------------------------------------------------------------------- /src/ppo/custom_trlx_trainers/custom_accelerate_ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/custom_trlx_trainers/custom_accelerate_ppo_trainer.py -------------------------------------------------------------------------------- /src/ppo/run_ppo_gold_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/run_ppo_gold_eval.py -------------------------------------------------------------------------------- /src/ppo/trainer_rl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/ppo/trainer_rl.py -------------------------------------------------------------------------------- /src/reward_modeling/scoring/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/reward_modeling/scoring/ppo_reward_functions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/reward_modeling/scoring/ppo_reward_functions.py -------------------------------------------------------------------------------- /src/reward_modeling/scoring/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/reward_modeling/scoring/score.py -------------------------------------------------------------------------------- /src/reward_modeling/training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/reward_modeling/training/README.md -------------------------------------------------------------------------------- /src/reward_modeling/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/reward_modeling/training/trainer_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/reward_modeling/training/trainer_rm.py -------------------------------------------------------------------------------- /src/sft/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sft/trainer_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/sft/trainer_sft.py -------------------------------------------------------------------------------- /src/utils/merge_seeds_script.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/utils/merge_seeds_script.py -------------------------------------------------------------------------------- /src/utils/merge_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tlc4418/llm_optimization/HEAD/src/utils/merge_utils.py --------------------------------------------------------------------------------