├── .gitignore ├── README.md ├── eval.py ├── generate_rollouts.py ├── gpt4_annotate ├── GPT4_b5 │ ├── pref1A.txt │ ├── pref1A.yaml │ ├── pref1A_wi.txt │ ├── pref1B.txt │ ├── pref1B.yaml │ ├── pref1B_wi.txt │ ├── pref2A.txt │ ├── pref2A.yaml │ ├── pref2A_wi.txt │ ├── pref2B.txt │ ├── pref2B.yaml │ ├── pref2B_wi.txt │ ├── pref3A.txt │ ├── pref3A.yaml │ ├── pref3A_wi.txt │ ├── pref3B.txt │ ├── pref3B.yaml │ ├── pref3B_wi.txt │ ├── pref3C.txt │ ├── pref3C.yaml │ ├── pref3C_wi.txt │ ├── pref3D.txt │ ├── pref3D.yaml │ └── pref3D_wi.txt └── run.py ├── gpt4_evaluate ├── alpaca_farm │ ├── __init__.py │ ├── accelerate_patch.py │ ├── auto_annotations │ │ ├── __init__.py │ │ ├── analysis.py │ │ ├── annotators │ │ │ ├── annotator_pool_v0 │ │ │ │ ├── chatml_b1_chat_v0_with_inputs.txt │ │ │ │ ├── chatml_b1_chat_v0_without_inputs.txt │ │ │ │ ├── chatml_b1_chat_with_inputs.txt │ │ │ │ ├── chatml_b1_chat_without_inputs.txt │ │ │ │ ├── chatml_b1_cot_json_with_inputs.txt │ │ │ │ ├── chatml_b1_cot_json_without_inputs.txt │ │ │ │ ├── chatml_b1_with_inputs.txt │ │ │ │ ├── chatml_b1_without_inputs.txt │ │ │ │ ├── chatml_b4_cot_json_with_inputs.txt │ │ │ │ ├── chatml_b4_cot_json_without_inputs.txt │ │ │ │ ├── chatml_b5_diana_with_inputs.txt │ │ │ │ ├── chatml_b5_diana_without_inputs.txt │ │ │ │ ├── chatml_b5_joe_with_inputs.txt │ │ │ │ ├── chatml_b5_joe_without_inputs.txt │ │ │ │ ├── chatml_b5_with_inputs.txt │ │ │ │ ├── chatml_b5_without_inputs.txt │ │ │ │ ├── configs.yaml │ │ │ │ ├── text_b1_v0_with_inputs.txt │ │ │ │ ├── text_b1_v0_without_inputs.txt │ │ │ │ ├── text_b1_with_inputs.txt │ │ │ │ ├── text_b1_without_inputs.txt │ │ │ │ ├── text_b4_reasoning_with_inputs.txt │ │ │ │ ├── text_b4_reasoning_without_inputs.txt │ │ │ │ ├── text_b5_with_inputs.txt │ │ │ │ └── text_b5_without_inputs.txt │ │ │ ├── criteria_wise_eval_gpt4 │ │ │ │ ├── demonstrations.txt │ │ │ │ ├── helpfulness.txt │ │ │ │ ├── helpfulness.yaml │ │ │ │ ├── p1a.txt │ │ │ │ ├── p1a.yaml │ │ │ │ ├── p1b.txt │ │ │ │ ├── p1b.yaml │ │ │ │ ├── p2a.txt │ │ │ │ ├── p2a.yaml │ │ │ │ ├── p2b.txt │ │ │ │ ├── p2b.yaml │ │ │ │ ├── p3a.txt │ │ │ │ ├── p3a.yaml │ │ │ │ ├── p3b.txt │ │ │ │ ├── p3b.yaml │ │ │ │ ├── p3c.txt │ │ │ │ ├── p3c.yaml │ │ │ │ ├── p3d.txt │ │ │ │ └── p3d.yaml │ │ │ ├── gpt-3.5-turbo │ │ │ │ └── configs.yaml │ │ │ ├── greedy_gpt4 │ │ │ │ ├── chatml_b5_with_inputs.txt │ │ │ │ ├── chatml_b5_without_inputs.txt │ │ │ │ └── configs.yaml │ │ │ ├── preferences │ │ │ │ └── pref1.txt │ │ │ └── text-davinci-003 │ │ │ │ ├── annotations.json │ │ │ │ └── configs.yaml │ │ ├── decoders.py │ │ ├── eval.py │ │ ├── pairwise_annotators.py │ │ ├── requirements.txt │ │ └── utils.py │ ├── common.py │ ├── constants.py │ ├── data_postprocessor.py │ ├── data_preprocessor.py │ ├── data_utils.py │ ├── distributed_utils.py │ ├── flash_models │ │ ├── README.md │ │ ├── __init__.py │ │ ├── apex_patch.py │ │ ├── flash_llama.py │ │ ├── flash_opt.py │ │ └── tensor_ops.py │ ├── inference │ │ ├── __init__.py │ │ ├── decode.py │ │ └── score.py │ ├── logging.py │ ├── models │ │ ├── __init__.py │ │ ├── reward_model.py │ │ └── rl_models.py │ ├── openai_utils.py │ ├── reward_modeling_trainer.py │ ├── rl │ │ ├── __init__.py │ │ ├── kl_controller.py │ │ ├── ppo_trainer.py │ │ ├── ppo_utils.py │ │ └── rl_trainer.py │ ├── torch_ops.py │ ├── trainer_utils.py │ ├── types.py │ └── utils.py └── run.py ├── requirements.txt ├── training ├── multitask_training.py ├── pmorl.py ├── psoups.py └── rlhf.py └── training_reward_model.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/README.md -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/eval.py -------------------------------------------------------------------------------- /generate_rollouts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/generate_rollouts.py -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1A.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1A.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1A.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1A.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1A_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1A_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1B.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1B.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1B.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref1B_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref1B_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2A.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2A.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2A.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2A.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2A_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2A_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2B.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2B.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2B.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref2B_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref2B_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3A.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3A.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3A.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3A.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3A_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3A_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3B.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3B.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3B.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3B_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3B_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3C.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3C.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3C.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3C.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3C_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3C_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3D.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3D.txt -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3D.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3D.yaml -------------------------------------------------------------------------------- /gpt4_annotate/GPT4_b5/pref3D_wi.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/GPT4_b5/pref3D_wi.txt -------------------------------------------------------------------------------- /gpt4_annotate/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_annotate/run.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/accelerate_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/accelerate_patch.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/analysis.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_v0_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_v0_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_v0_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_v0_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_chat_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_cot_json_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_cot_json_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_cot_json_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_cot_json_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b1_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b4_cot_json_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b4_cot_json_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b4_cot_json_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b4_cot_json_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_diana_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_diana_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_diana_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_diana_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_joe_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_joe_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_joe_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_joe_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/chatml_b5_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/configs.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_v0_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_v0_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_v0_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_v0_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b1_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b4_reasoning_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b4_reasoning_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b4_reasoning_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b4_reasoning_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b5_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b5_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b5_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/annotator_pool_v0/text_b5_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/demonstrations.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/demonstrations.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/helpfulness.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/helpfulness.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/helpfulness.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/helpfulness.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1a.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1a.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1a.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1b.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1b.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p1b.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2a.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2a.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2a.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2b.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2b.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p2b.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3a.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3a.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3a.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3b.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3b.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3b.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3b.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3c.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3c.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3c.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3c.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3d.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3d.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3d.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/criteria_wise_eval_gpt4/p3d.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/gpt-3.5-turbo/configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/gpt-3.5-turbo/configs.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/chatml_b5_with_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/chatml_b5_with_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/chatml_b5_without_inputs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/chatml_b5_without_inputs.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/greedy_gpt4/configs.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/preferences/pref1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/preferences/pref1.txt -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/text-davinci-003/annotations.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/text-davinci-003/annotations.json -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/annotators/text-davinci-003/configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/annotators/text-davinci-003/configs.yaml -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/decoders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/decoders.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/eval.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/pairwise_annotators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/pairwise_annotators.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | openai 3 | pandas 4 | tiktoken>=0.3.2 5 | fire 6 | -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/auto_annotations/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/auto_annotations/utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/common.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/constants.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/data_postprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/data_postprocessor.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/data_preprocessor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/data_preprocessor.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/data_utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/distributed_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/distributed_utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/README.md -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/apex_patch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/apex_patch.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/flash_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/flash_llama.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/flash_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/flash_opt.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/flash_models/tensor_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/flash_models/tensor_ops.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/inference/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/inference/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/inference/decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/inference/decode.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/inference/score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/inference/score.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/logging.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/logging.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/models/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/models/reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/models/reward_model.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/models/rl_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/models/rl_models.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/openai_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/openai_utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/reward_modeling_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/reward_modeling_trainer.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/rl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/rl/__init__.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/rl/kl_controller.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/rl/kl_controller.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/rl/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/rl/ppo_trainer.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/rl/ppo_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/rl/ppo_utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/rl/rl_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/rl/rl_trainer.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/torch_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/torch_ops.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/trainer_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/trainer_utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/types.py -------------------------------------------------------------------------------- /gpt4_evaluate/alpaca_farm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/alpaca_farm/utils.py -------------------------------------------------------------------------------- /gpt4_evaluate/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/gpt4_evaluate/run.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/requirements.txt -------------------------------------------------------------------------------- /training/multitask_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/training/multitask_training.py -------------------------------------------------------------------------------- /training/pmorl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/training/pmorl.py -------------------------------------------------------------------------------- /training/psoups.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/training/psoups.py -------------------------------------------------------------------------------- /training/rlhf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/training/rlhf.py -------------------------------------------------------------------------------- /training_reward_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joeljang/RLPHF/HEAD/training_reward_model.py --------------------------------------------------------------------------------