├── .gitignore
├── .pylintrc
├── LICENSE
├── README.md
├── data
    ├── synthetic_hhrlhf_harmless_expert_gpt4.csv
    └── synthetic_hhrlhf_helpful_expert_gpt4.csv
├── img
    ├── demo.png
    └── paper_prev.png
├── networks.py
├── step1_sft.py
├── step2_gen_sample.py
├── step3.5_processing_data.py
├── step3_reward_annotation.py
├── step4_gen_embeddings.py
├── step5_train_rms.py
└── step6_eval_rms.py


/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/.gitignore


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/.pylintrc


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/README.md


--------------------------------------------------------------------------------
/data/synthetic_hhrlhf_harmless_expert_gpt4.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/data/synthetic_hhrlhf_harmless_expert_gpt4.csv


--------------------------------------------------------------------------------
/data/synthetic_hhrlhf_helpful_expert_gpt4.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/data/synthetic_hhrlhf_helpful_expert_gpt4.csv


--------------------------------------------------------------------------------
/img/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/img/demo.png


--------------------------------------------------------------------------------
/img/paper_prev.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/img/paper_prev.png


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/networks.py


--------------------------------------------------------------------------------
/step1_sft.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step1_sft.py


--------------------------------------------------------------------------------
/step2_gen_sample.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step2_gen_sample.py


--------------------------------------------------------------------------------
/step3.5_processing_data.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step3.5_processing_data.py


--------------------------------------------------------------------------------
/step3_reward_annotation.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step3_reward_annotation.py


--------------------------------------------------------------------------------
/step4_gen_embeddings.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step4_gen_embeddings.py


--------------------------------------------------------------------------------
/step5_train_rms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step5_train_rms.py


--------------------------------------------------------------------------------
/step6_eval_rms.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/holarissun/RewardModelingBeyondBradleyTerry/HEAD/step6_eval_rms.py


--------------------------------------------------------------------------------