├── .DS_Store ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── config.yml │ └── feature_request.yml ├── .gitignore ├── FAQ.md ├── LICENSE ├── MODEL_LICENSE ├── PROJECT.md ├── README.md ├── __init__.py ├── api.py ├── cli_demo.py ├── examples ├── ad-writing-2.png ├── blog-outline.png ├── comments-writing.png ├── email-writing-1.png ├── email-writing-2.png ├── information-extraction.png ├── role-play.png ├── self-introduction.png ├── sport.png └── tour-guide.png ├── img.png ├── img_1.png ├── img_2.png ├── img_3.png ├── img_4.png ├── img_5.png ├── img_6.png ├── improve ├── README.md └── data_sample.jsonl ├── limitations ├── factual_error.png ├── math_error.png ├── self-confusion_google.jpg ├── self-confusion_openai.jpg └── self-confusion_tencent.jpg ├── ptuning ├── .DS_Store ├── __init__.py ├── arguments.py ├── data │ ├── .DS_Store │ ├── comparison_data.json │ ├── dataset_info.json │ ├── sft_data.json │ ├── title_cat_1000.csv │ ├── train.json │ └── train_news.json ├── deepspeed.json ├── dev.json ├── ds_train_finetune.sh ├── evaluate.sh ├── evaluate_finetune.sh ├── finetune_glm2.py ├── finetune_lora.py ├── finetune_lora_rm.py ├── finetune_lora_sft.py ├── finetune_ppo.py ├── img.png ├── main.py ├── output │ ├── .DS_Store │ ├── adapter_config.json │ ├── adapter_model.bin │ └── checkpoint-1500 │ │ ├── adapter_model.bin │ │ ├── optimizer.pt │ │ ├── rng_state.pth │ │ ├── scaler.pt │ │ ├── scheduler.pt │ │ ├── trainer_state.json │ │ └── training_args.bin ├── predict_glm.py ├── predict_lora.py ├── predict_lora_rm.py ├── predict_lora_sft.py ├── predict_ppo.py ├── runs │ ├── Jun14_10-56-34_QJZCYZHANG-MB2 │ │ └── events.out.tfevents.1686711394.QJZCYZHANG-MB2.20665.0 │ ├── Jun14_10-57-17_QJZCYZHANG-MB2 │ │ └── events.out.tfevents.1686711437.QJZCYZHANG-MB2.20761.0 │ ├── Jun14_11-12-18_QJZCYZHANG-MB2 │ │ └── events.out.tfevents.1686712338.QJZCYZHANG-MB2.22840.0 │ └── Jun14_11-14-08_QJZCYZHANG-MB2 │ │ └── events.out.tfevents.1686712448.QJZCYZHANG-MB2.23080.0 ├── train.sh ├── train_chat.sh ├── trainer.py ├── trainer_seq2seq.py ├── utils.zip ├── utils │ ├── __init__.py │ ├── common.py │ ├── config.py │ ├── data_collator.py │ ├── other.py │ ├── pairwise.py │ ├── peft_trainer.py │ ├── ppo.py │ ├── ppo_trainer.py │ └── seq2seq.py ├── web_demo.py └── web_demo.sh ├── requirements.txt ├── resources ├── WECHAT.md ├── cli-demo.png ├── web-demo.gif ├── web-demo.png └── wechat.jpg ├── title_predict.py ├── utils.py ├── web_demo.py ├── web_demo2.py └── web_demo_old.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/.DS_Store -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/.github/ISSUE_TEMPLATE/bug_report.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/.github/ISSUE_TEMPLATE/feature_request.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/.gitignore -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/FAQ.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/LICENSE -------------------------------------------------------------------------------- /MODEL_LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/MODEL_LICENSE -------------------------------------------------------------------------------- /PROJECT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/PROJECT.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/README.md -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/api.py -------------------------------------------------------------------------------- /cli_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/cli_demo.py -------------------------------------------------------------------------------- /examples/ad-writing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/ad-writing-2.png -------------------------------------------------------------------------------- /examples/blog-outline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/blog-outline.png -------------------------------------------------------------------------------- /examples/comments-writing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/comments-writing.png -------------------------------------------------------------------------------- /examples/email-writing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/email-writing-1.png -------------------------------------------------------------------------------- /examples/email-writing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/email-writing-2.png -------------------------------------------------------------------------------- /examples/information-extraction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/information-extraction.png -------------------------------------------------------------------------------- /examples/role-play.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/role-play.png -------------------------------------------------------------------------------- /examples/self-introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/self-introduction.png -------------------------------------------------------------------------------- /examples/sport.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/sport.png -------------------------------------------------------------------------------- /examples/tour-guide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/examples/tour-guide.png -------------------------------------------------------------------------------- /img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img.png -------------------------------------------------------------------------------- /img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_1.png -------------------------------------------------------------------------------- /img_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_2.png -------------------------------------------------------------------------------- /img_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_3.png -------------------------------------------------------------------------------- /img_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_4.png -------------------------------------------------------------------------------- /img_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_5.png -------------------------------------------------------------------------------- /img_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/img_6.png -------------------------------------------------------------------------------- /improve/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/improve/README.md -------------------------------------------------------------------------------- /improve/data_sample.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/improve/data_sample.jsonl -------------------------------------------------------------------------------- /limitations/factual_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/limitations/factual_error.png -------------------------------------------------------------------------------- /limitations/math_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/limitations/math_error.png -------------------------------------------------------------------------------- /limitations/self-confusion_google.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/limitations/self-confusion_google.jpg -------------------------------------------------------------------------------- /limitations/self-confusion_openai.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/limitations/self-confusion_openai.jpg -------------------------------------------------------------------------------- /limitations/self-confusion_tencent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/limitations/self-confusion_tencent.jpg -------------------------------------------------------------------------------- /ptuning/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/.DS_Store -------------------------------------------------------------------------------- /ptuning/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ptuning/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/arguments.py -------------------------------------------------------------------------------- /ptuning/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/.DS_Store -------------------------------------------------------------------------------- /ptuning/data/comparison_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/comparison_data.json -------------------------------------------------------------------------------- /ptuning/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/dataset_info.json -------------------------------------------------------------------------------- /ptuning/data/sft_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/sft_data.json -------------------------------------------------------------------------------- /ptuning/data/title_cat_1000.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/title_cat_1000.csv -------------------------------------------------------------------------------- /ptuning/data/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/train.json -------------------------------------------------------------------------------- /ptuning/data/train_news.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/data/train_news.json -------------------------------------------------------------------------------- /ptuning/deepspeed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/deepspeed.json -------------------------------------------------------------------------------- /ptuning/dev.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/dev.json -------------------------------------------------------------------------------- /ptuning/ds_train_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/ds_train_finetune.sh -------------------------------------------------------------------------------- /ptuning/evaluate.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/evaluate.sh -------------------------------------------------------------------------------- /ptuning/evaluate_finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/evaluate_finetune.sh -------------------------------------------------------------------------------- /ptuning/finetune_glm2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/finetune_glm2.py -------------------------------------------------------------------------------- /ptuning/finetune_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/finetune_lora.py -------------------------------------------------------------------------------- /ptuning/finetune_lora_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/finetune_lora_rm.py -------------------------------------------------------------------------------- /ptuning/finetune_lora_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/finetune_lora_sft.py -------------------------------------------------------------------------------- /ptuning/finetune_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/finetune_ppo.py -------------------------------------------------------------------------------- /ptuning/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/img.png -------------------------------------------------------------------------------- /ptuning/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/main.py -------------------------------------------------------------------------------- /ptuning/output/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/.DS_Store -------------------------------------------------------------------------------- /ptuning/output/adapter_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/adapter_config.json -------------------------------------------------------------------------------- /ptuning/output/adapter_model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/adapter_model.bin -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/adapter_model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/adapter_model.bin -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/optimizer.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/optimizer.pt -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/rng_state.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/rng_state.pth -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/scaler.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/scaler.pt -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/scheduler.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/scheduler.pt -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/trainer_state.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/trainer_state.json -------------------------------------------------------------------------------- /ptuning/output/checkpoint-1500/training_args.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/output/checkpoint-1500/training_args.bin -------------------------------------------------------------------------------- /ptuning/predict_glm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/predict_glm.py -------------------------------------------------------------------------------- /ptuning/predict_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/predict_lora.py -------------------------------------------------------------------------------- /ptuning/predict_lora_rm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/predict_lora_rm.py -------------------------------------------------------------------------------- /ptuning/predict_lora_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/predict_lora_sft.py -------------------------------------------------------------------------------- /ptuning/predict_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/predict_ppo.py -------------------------------------------------------------------------------- /ptuning/runs/Jun14_10-56-34_QJZCYZHANG-MB2/events.out.tfevents.1686711394.QJZCYZHANG-MB2.20665.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/runs/Jun14_10-56-34_QJZCYZHANG-MB2/events.out.tfevents.1686711394.QJZCYZHANG-MB2.20665.0 -------------------------------------------------------------------------------- /ptuning/runs/Jun14_10-57-17_QJZCYZHANG-MB2/events.out.tfevents.1686711437.QJZCYZHANG-MB2.20761.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/runs/Jun14_10-57-17_QJZCYZHANG-MB2/events.out.tfevents.1686711437.QJZCYZHANG-MB2.20761.0 -------------------------------------------------------------------------------- /ptuning/runs/Jun14_11-12-18_QJZCYZHANG-MB2/events.out.tfevents.1686712338.QJZCYZHANG-MB2.22840.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/runs/Jun14_11-12-18_QJZCYZHANG-MB2/events.out.tfevents.1686712338.QJZCYZHANG-MB2.22840.0 -------------------------------------------------------------------------------- /ptuning/runs/Jun14_11-14-08_QJZCYZHANG-MB2/events.out.tfevents.1686712448.QJZCYZHANG-MB2.23080.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/runs/Jun14_11-14-08_QJZCYZHANG-MB2/events.out.tfevents.1686712448.QJZCYZHANG-MB2.23080.0 -------------------------------------------------------------------------------- /ptuning/train.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/train.sh -------------------------------------------------------------------------------- /ptuning/train_chat.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/train_chat.sh -------------------------------------------------------------------------------- /ptuning/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/trainer.py -------------------------------------------------------------------------------- /ptuning/trainer_seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/trainer_seq2seq.py -------------------------------------------------------------------------------- /ptuning/utils.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils.zip -------------------------------------------------------------------------------- /ptuning/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/__init__.py -------------------------------------------------------------------------------- /ptuning/utils/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/common.py -------------------------------------------------------------------------------- /ptuning/utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/config.py -------------------------------------------------------------------------------- /ptuning/utils/data_collator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/data_collator.py -------------------------------------------------------------------------------- /ptuning/utils/other.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/other.py -------------------------------------------------------------------------------- /ptuning/utils/pairwise.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/pairwise.py -------------------------------------------------------------------------------- /ptuning/utils/peft_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/peft_trainer.py -------------------------------------------------------------------------------- /ptuning/utils/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/ppo.py -------------------------------------------------------------------------------- /ptuning/utils/ppo_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/ppo_trainer.py -------------------------------------------------------------------------------- /ptuning/utils/seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/utils/seq2seq.py -------------------------------------------------------------------------------- /ptuning/web_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/web_demo.py -------------------------------------------------------------------------------- /ptuning/web_demo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/ptuning/web_demo.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/requirements.txt -------------------------------------------------------------------------------- /resources/WECHAT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/resources/WECHAT.md -------------------------------------------------------------------------------- /resources/cli-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/resources/cli-demo.png -------------------------------------------------------------------------------- /resources/web-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/resources/web-demo.gif -------------------------------------------------------------------------------- /resources/web-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/resources/web-demo.png -------------------------------------------------------------------------------- /resources/wechat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/resources/wechat.jpg -------------------------------------------------------------------------------- /title_predict.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/title_predict.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/utils.py -------------------------------------------------------------------------------- /web_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/web_demo.py -------------------------------------------------------------------------------- /web_demo2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/web_demo2.py -------------------------------------------------------------------------------- /web_demo_old.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pillars-Creation/ChatGLM-RLHF-LoRA-RM-PPO/HEAD/web_demo_old.py --------------------------------------------------------------------------------