├── .gitignore
├── LICENSE
├── README.md
├── cali
    ├── cali_plot.py
    ├── hh_cali_eval.py
    └── readme.md
├── config
    ├── config.yaml
    ├── loss
    │   ├── dpo.yaml
    │   └── sft.yaml
    └── model
    │   ├── blank_model.yaml
    │   ├── gpt2_large.yaml
    │   ├── gptj.yaml
    │   ├── llama7b.yaml
    │   ├── pythia28.yaml
    │   └── pythia69.yaml
├── generation.py
├── metrics
    └── imdb
    │   ├── imdb_eval_metrics.py
    │   ├── run_alpha.sh
    │   ├── run_forward.sh
    │   ├── run_jsd.sh
    │   └── run_reverse.sh
├── misc
    └── imdb_rlhf_pairs.csv
├── mt_bench
    ├── README.md
    ├── clean_judgment.py
    ├── common.py
    ├── compute_agreement.py
    ├── convert_dpo_trainer_file_to_huggingface.py
    ├── download_mt_bench_pregenerated.py
    ├── export_gpt4_eval_competition_samples.py
    ├── gen_api_answer.py
    ├── gen_judgment.py
    ├── gen_model_answer.py
    ├── generate_model_outputs_for_gpt4_eval.py
    ├── qa_browser.py
    ├── run_gen_gpt4_judge_pairwise.sh
    └── show_result.py
├── ppo
    ├── README.md
    ├── __init__.py
    ├── configs.py
    ├── configs
    │   └── hh_config.yaml
    ├── ppo_hh.py
    ├── ppo_sentiment.py
    ├── run_ppo_hh.sh
    ├── scripts
    │   ├── sweep_alpha.sh
    │   ├── sweep_fkl.sh
    │   ├── sweep_jsd.sh
    │   └── sweep_rkl.sh
    └── trainer.py
├── preference_datasets.py
├── requirements.txt
├── scripts
    ├── cp_files.sh
    ├── hh
    │   ├── run_alphad03.sh
    │   ├── run_alphad03_09.sh
    │   ├── run_alphad05.sh
    │   ├── run_alphad05_09.sh
    │   ├── run_alphad07.sh
    │   ├── run_alphad07_09.sh
    │   ├── run_forward_kl.sh
    │   ├── run_jsd.sh
    │   ├── run_reverse_kl.sh
    │   └── run_sft.sh
    └── imdb
    │   ├── run_alpha.sh
    │   ├── run_forward_kl.sh
    │   ├── run_jsd.sh
    │   ├── run_reverse_kl.sh
    │   └── run_sft.sh
├── train.py
├── trainers.py
├── utils.py
└── utils
    ├── convert_ckpt.py
    ├── dataset_generation.py
    ├── dataset_generation_batch.py
    ├── gen.py
    ├── gen_data.sh
    ├── gen_dpo_reponses_hh.sh
    ├── gen_ppo_reponses_hh.sh
    ├── hh_response_generation.py
    └── hh_response_generation_ppo.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/.gitignore


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/README.md


--------------------------------------------------------------------------------
/cali/cali_plot.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/cali_plot.py


--------------------------------------------------------------------------------
/cali/hh_cali_eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/hh_cali_eval.py


--------------------------------------------------------------------------------
/cali/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/cali/readme.md


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/config.yaml


--------------------------------------------------------------------------------
/config/loss/dpo.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/loss/dpo.yaml


--------------------------------------------------------------------------------
/config/loss/sft.yaml:
--------------------------------------------------------------------------------
1 | name: sft


--------------------------------------------------------------------------------
/config/model/blank_model.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/blank_model.yaml


--------------------------------------------------------------------------------
/config/model/gpt2_large.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/gpt2_large.yaml


--------------------------------------------------------------------------------
/config/model/gptj.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/gptj.yaml


--------------------------------------------------------------------------------
/config/model/llama7b.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/llama7b.yaml


--------------------------------------------------------------------------------
/config/model/pythia28.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/pythia28.yaml


--------------------------------------------------------------------------------
/config/model/pythia69.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/config/model/pythia69.yaml


--------------------------------------------------------------------------------
/generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/generation.py


--------------------------------------------------------------------------------
/metrics/imdb/imdb_eval_metrics.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/imdb_eval_metrics.py


--------------------------------------------------------------------------------
/metrics/imdb/run_alpha.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_alpha.sh


--------------------------------------------------------------------------------
/metrics/imdb/run_forward.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_forward.sh


--------------------------------------------------------------------------------
/metrics/imdb/run_jsd.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_jsd.sh


--------------------------------------------------------------------------------
/metrics/imdb/run_reverse.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/metrics/imdb/run_reverse.sh


--------------------------------------------------------------------------------
/misc/imdb_rlhf_pairs.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/misc/imdb_rlhf_pairs.csv


--------------------------------------------------------------------------------
/mt_bench/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/README.md


--------------------------------------------------------------------------------
/mt_bench/clean_judgment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/clean_judgment.py


--------------------------------------------------------------------------------
/mt_bench/common.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/common.py


--------------------------------------------------------------------------------
/mt_bench/compute_agreement.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/compute_agreement.py


--------------------------------------------------------------------------------
/mt_bench/convert_dpo_trainer_file_to_huggingface.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/convert_dpo_trainer_file_to_huggingface.py


--------------------------------------------------------------------------------
/mt_bench/download_mt_bench_pregenerated.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/download_mt_bench_pregenerated.py


--------------------------------------------------------------------------------
/mt_bench/export_gpt4_eval_competition_samples.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/export_gpt4_eval_competition_samples.py


--------------------------------------------------------------------------------
/mt_bench/gen_api_answer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_api_answer.py


--------------------------------------------------------------------------------
/mt_bench/gen_judgment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_judgment.py


--------------------------------------------------------------------------------
/mt_bench/gen_model_answer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/gen_model_answer.py


--------------------------------------------------------------------------------
/mt_bench/generate_model_outputs_for_gpt4_eval.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/generate_model_outputs_for_gpt4_eval.py


--------------------------------------------------------------------------------
/mt_bench/qa_browser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/qa_browser.py


--------------------------------------------------------------------------------
/mt_bench/run_gen_gpt4_judge_pairwise.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/run_gen_gpt4_judge_pairwise.sh


--------------------------------------------------------------------------------
/mt_bench/show_result.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/mt_bench/show_result.py


--------------------------------------------------------------------------------
/ppo/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/README.md


--------------------------------------------------------------------------------
/ppo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ppo/configs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/configs.py


--------------------------------------------------------------------------------
/ppo/configs/hh_config.yaml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/configs/hh_config.yaml


--------------------------------------------------------------------------------
/ppo/ppo_hh.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/ppo_hh.py


--------------------------------------------------------------------------------
/ppo/ppo_sentiment.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/ppo_sentiment.py


--------------------------------------------------------------------------------
/ppo/run_ppo_hh.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/run_ppo_hh.sh


--------------------------------------------------------------------------------
/ppo/scripts/sweep_alpha.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_alpha.sh


--------------------------------------------------------------------------------
/ppo/scripts/sweep_fkl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_fkl.sh


--------------------------------------------------------------------------------
/ppo/scripts/sweep_jsd.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_jsd.sh


--------------------------------------------------------------------------------
/ppo/scripts/sweep_rkl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/scripts/sweep_rkl.sh


--------------------------------------------------------------------------------
/ppo/trainer.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/ppo/trainer.py


--------------------------------------------------------------------------------
/preference_datasets.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/preference_datasets.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/requirements.txt


--------------------------------------------------------------------------------
/scripts/cp_files.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/cp_files.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad03.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad03.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad03_09.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad03_09.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad05.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad05.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad05_09.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad05_09.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad07.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad07.sh


--------------------------------------------------------------------------------
/scripts/hh/run_alphad07_09.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_alphad07_09.sh


--------------------------------------------------------------------------------
/scripts/hh/run_forward_kl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_forward_kl.sh


--------------------------------------------------------------------------------
/scripts/hh/run_jsd.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_jsd.sh


--------------------------------------------------------------------------------
/scripts/hh/run_reverse_kl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_reverse_kl.sh


--------------------------------------------------------------------------------
/scripts/hh/run_sft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/hh/run_sft.sh


--------------------------------------------------------------------------------
/scripts/imdb/run_alpha.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_alpha.sh


--------------------------------------------------------------------------------
/scripts/imdb/run_forward_kl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_forward_kl.sh


--------------------------------------------------------------------------------
/scripts/imdb/run_jsd.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_jsd.sh


--------------------------------------------------------------------------------
/scripts/imdb/run_reverse_kl.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_reverse_kl.sh


--------------------------------------------------------------------------------
/scripts/imdb/run_sft.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/scripts/imdb/run_sft.sh


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/train.py


--------------------------------------------------------------------------------
/trainers.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/trainers.py


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils.py


--------------------------------------------------------------------------------
/utils/convert_ckpt.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/convert_ckpt.py


--------------------------------------------------------------------------------
/utils/dataset_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/dataset_generation.py


--------------------------------------------------------------------------------
/utils/dataset_generation_batch.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/dataset_generation_batch.py


--------------------------------------------------------------------------------
/utils/gen.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen.py


--------------------------------------------------------------------------------
/utils/gen_data.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_data.sh


--------------------------------------------------------------------------------
/utils/gen_dpo_reponses_hh.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_dpo_reponses_hh.sh


--------------------------------------------------------------------------------
/utils/gen_ppo_reponses_hh.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/gen_ppo_reponses_hh.sh


--------------------------------------------------------------------------------
/utils/hh_response_generation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/hh_response_generation.py


--------------------------------------------------------------------------------
/utils/hh_response_generation_ppo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alecwangcq/f-divergence-dpo/HEAD/utils/hh_response_generation_ppo.py


--------------------------------------------------------------------------------