├── .gitignore ├── LICENSE ├── README.md ├── cali ├── cali_plot.py ├── hh_cali_eval.py └── readme.md ├── config ├── config.yaml ├── loss │ ├── dpo.yaml │ └── sft.yaml └── model │ ├── blank_model.yaml │ ├── gpt2_large.yaml │ ├── gptj.yaml │ ├── llama7b.yaml │ ├── pythia28.yaml │ └── pythia69.yaml ├── generation.py ├── metrics └── imdb │ ├── imdb_eval_metrics.py │ ├── run_alpha.sh │ ├── run_forward.sh │ ├── run_jsd.sh │ └── run_reverse.sh ├── misc └── imdb_rlhf_pairs.csv ├── mt_bench ├── README.md ├── clean_judgment.py ├── common.py ├── compute_agreement.py ├── convert_dpo_trainer_file_to_huggingface.py ├── download_mt_bench_pregenerated.py ├── export_gpt4_eval_competition_samples.py ├── gen_api_answer.py ├── gen_judgment.py ├── gen_model_answer.py ├── generate_model_outputs_for_gpt4_eval.py ├── qa_browser.py ├── run_gen_gpt4_judge_pairwise.sh └── show_result.py ├── ppo ├── README.md ├── __init__.py ├── configs.py ├── configs │ └── hh_config.yaml ├── ppo_hh.py ├── ppo_sentiment.py ├── run_ppo_hh.sh ├── scripts │ ├── sweep_alpha.sh │ ├── sweep_fkl.sh │ ├── sweep_jsd.sh │ └── sweep_rkl.sh └── trainer.py ├── preference_datasets.py ├── requirements.txt ├── scripts ├── cp_files.sh ├── hh │ ├── run_alphad03.sh │ ├── run_alphad03_09.sh │ ├── run_alphad05.sh │ ├── run_alphad05_09.sh │ ├── run_alphad07.sh │ ├── run_alphad07_09.sh │ ├── run_forward_kl.sh │ ├── run_jsd.sh │ ├── run_reverse_kl.sh │ └── run_sft.sh └── imdb │ ├── run_alpha.sh │ ├── run_forward_kl.sh │ ├── run_jsd.sh │ ├── run_reverse_kl.sh │ └── run_sft.sh ├── train.py ├── trainers.py ├── utils.py └── utils ├── convert_ckpt.py ├── dataset_generation.py ├── dataset_generation_batch.py ├── gen.py ├── gen_data.sh ├── gen_dpo_reponses_hh.sh ├── gen_ppo_reponses_hh.sh ├── hh_response_generation.py └── hh_response_generation_ppo.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/README.md -------------------------------------------------------------------------------- /cali/cali_plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/cali_plot.py -------------------------------------------------------------------------------- /cali/hh_cali_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/hh_cali_eval.py -------------------------------------------------------------------------------- /cali/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/readme.md -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/config.yaml -------------------------------------------------------------------------------- /config/loss/dpo.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/loss/dpo.yaml -------------------------------------------------------------------------------- /config/loss/sft.yaml: -------------------------------------------------------------------------------- 1 | name: sft -------------------------------------------------------------------------------- /config/model/blank_model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/blank_model.yaml -------------------------------------------------------------------------------- /config/model/gpt2_large.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/gpt2_large.yaml -------------------------------------------------------------------------------- /config/model/gptj.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/gptj.yaml -------------------------------------------------------------------------------- /config/model/llama7b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/llama7b.yaml -------------------------------------------------------------------------------- /config/model/pythia28.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/pythia28.yaml -------------------------------------------------------------------------------- /config/model/pythia69.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/pythia69.yaml -------------------------------------------------------------------------------- /generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/generation.py -------------------------------------------------------------------------------- /metrics/imdb/imdb_eval_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/imdb_eval_metrics.py -------------------------------------------------------------------------------- /metrics/imdb/run_alpha.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_alpha.sh -------------------------------------------------------------------------------- /metrics/imdb/run_forward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_forward.sh -------------------------------------------------------------------------------- /metrics/imdb/run_jsd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_jsd.sh -------------------------------------------------------------------------------- /metrics/imdb/run_reverse.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_reverse.sh -------------------------------------------------------------------------------- /misc/imdb_rlhf_pairs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/misc/imdb_rlhf_pairs.csv -------------------------------------------------------------------------------- /mt_bench/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/README.md -------------------------------------------------------------------------------- /mt_bench/clean_judgment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/clean_judgment.py -------------------------------------------------------------------------------- /mt_bench/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/common.py -------------------------------------------------------------------------------- /mt_bench/compute_agreement.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/compute_agreement.py -------------------------------------------------------------------------------- /mt_bench/convert_dpo_trainer_file_to_huggingface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/convert_dpo_trainer_file_to_huggingface.py -------------------------------------------------------------------------------- /mt_bench/download_mt_bench_pregenerated.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/download_mt_bench_pregenerated.py -------------------------------------------------------------------------------- /mt_bench/export_gpt4_eval_competition_samples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/export_gpt4_eval_competition_samples.py -------------------------------------------------------------------------------- /mt_bench/gen_api_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_api_answer.py -------------------------------------------------------------------------------- /mt_bench/gen_judgment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_judgment.py -------------------------------------------------------------------------------- /mt_bench/gen_model_answer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_model_answer.py -------------------------------------------------------------------------------- /mt_bench/generate_model_outputs_for_gpt4_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/generate_model_outputs_for_gpt4_eval.py -------------------------------------------------------------------------------- /mt_bench/qa_browser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/qa_browser.py -------------------------------------------------------------------------------- /mt_bench/run_gen_gpt4_judge_pairwise.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/run_gen_gpt4_judge_pairwise.sh -------------------------------------------------------------------------------- /mt_bench/show_result.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/show_result.py -------------------------------------------------------------------------------- /ppo/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/README.md -------------------------------------------------------------------------------- /ppo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ppo/configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/configs.py -------------------------------------------------------------------------------- /ppo/configs/hh_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/configs/hh_config.yaml -------------------------------------------------------------------------------- /ppo/ppo_hh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/ppo_hh.py -------------------------------------------------------------------------------- /ppo/ppo_sentiment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/ppo_sentiment.py -------------------------------------------------------------------------------- /ppo/run_ppo_hh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/run_ppo_hh.sh -------------------------------------------------------------------------------- /ppo/scripts/sweep_alpha.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_alpha.sh -------------------------------------------------------------------------------- /ppo/scripts/sweep_fkl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_fkl.sh -------------------------------------------------------------------------------- /ppo/scripts/sweep_jsd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_jsd.sh -------------------------------------------------------------------------------- /ppo/scripts/sweep_rkl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_rkl.sh -------------------------------------------------------------------------------- /ppo/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/trainer.py -------------------------------------------------------------------------------- /preference_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/preference_datasets.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/cp_files.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/cp_files.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad03.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad03.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad03_09.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad03_09.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad05.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad05.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad05_09.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad05_09.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad07.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad07.sh -------------------------------------------------------------------------------- /scripts/hh/run_alphad07_09.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad07_09.sh -------------------------------------------------------------------------------- /scripts/hh/run_forward_kl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_forward_kl.sh -------------------------------------------------------------------------------- /scripts/hh/run_jsd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_jsd.sh -------------------------------------------------------------------------------- /scripts/hh/run_reverse_kl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_reverse_kl.sh -------------------------------------------------------------------------------- /scripts/hh/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_sft.sh -------------------------------------------------------------------------------- /scripts/imdb/run_alpha.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_alpha.sh -------------------------------------------------------------------------------- /scripts/imdb/run_forward_kl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_forward_kl.sh -------------------------------------------------------------------------------- /scripts/imdb/run_jsd.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_jsd.sh -------------------------------------------------------------------------------- /scripts/imdb/run_reverse_kl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_reverse_kl.sh -------------------------------------------------------------------------------- /scripts/imdb/run_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_sft.sh -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/train.py -------------------------------------------------------------------------------- /trainers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/trainers.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils.py -------------------------------------------------------------------------------- /utils/convert_ckpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/convert_ckpt.py -------------------------------------------------------------------------------- /utils/dataset_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/dataset_generation.py -------------------------------------------------------------------------------- /utils/dataset_generation_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/dataset_generation_batch.py -------------------------------------------------------------------------------- /utils/gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen.py -------------------------------------------------------------------------------- /utils/gen_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_data.sh -------------------------------------------------------------------------------- /utils/gen_dpo_reponses_hh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_dpo_reponses_hh.sh -------------------------------------------------------------------------------- /utils/gen_ppo_reponses_hh.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_ppo_reponses_hh.sh -------------------------------------------------------------------------------- /utils/hh_response_generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/hh_response_generation.py -------------------------------------------------------------------------------- /utils/hh_response_generation_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/hh_response_generation_ppo.py --------------------------------------------------------------------------------