├── imgs
    ├── test
    ├── qrcode.png
    ├── wechat2.png
    └── wechat3.png
├── src
    ├── llmtuner
    │   ├── extras
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── misc.cpython-38.pyc
    │   │   │   ├── logging.cpython-38.pyc
    │   │   │   ├── ploting.cpython-38.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── callbacks.cpython-38.pyc
    │   │   │   ├── constants.cpython-38.pyc
    │   │   │   ├── template.cpython-38.pyc
    │   │   │   └── save_and_load.cpython-38.pyc
    │   │   ├── logging.py
    │   │   ├── ploting.py
    │   │   ├── save_and_load.py
    │   │   ├── constants.py
    │   │   ├── misc.py
    │   │   └── callbacks.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── app.cpython-38.pyc
    │   │   │   └── __init__.cpython-38.pyc
    │   │   ├── protocol.py
    │   │   └── app.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── stream_chat.cpython-38.pyc
    │   │   └── stream_chat.py
    │   ├── tuner
    │   │   ├── pt
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   └── workflow.cpython-38.pyc
    │   │   │   └── workflow.py
    │   │   ├── rm
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   ├── collator.cpython-38.pyc
    │   │   │   │   ├── metric.cpython-38.pyc
    │   │   │   │   ├── trainer.cpython-38.pyc
    │   │   │   │   └── workflow.cpython-38.pyc
    │   │   │   ├── metric.py
    │   │   │   ├── collator.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── __init__.py
    │   │   ├── dpo
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── trainer.cpython-38.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   ├── collator.cpython-38.pyc
    │   │   │   │   └── workflow.cpython-38.pyc
    │   │   │   ├── collator.py
    │   │   │   ├── workflow.py
    │   │   │   └── trainer.py
    │   │   ├── ppo
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── trainer.cpython-38.pyc
    │   │   │   │   ├── utils.cpython-38.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   └── workflow.cpython-38.pyc
    │   │   │   ├── utils.py
    │   │   │   └── workflow.py
    │   │   ├── sft
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── metric.cpython-38.pyc
    │   │   │   │   ├── trainer.cpython-38.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   └── workflow.cpython-38.pyc
    │   │   │   ├── metric.py
    │   │   │   ├── workflow.py
    │   │   │   └── trainer.py
    │   │   ├── __pycache__
    │   │   │   ├── tune.cpython-38.pyc
    │   │   │   └── __init__.cpython-38.pyc
    │   │   ├── core
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── loader.cpython-38.pyc
    │   │   │   │   ├── parser.cpython-38.pyc
    │   │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   │   ├── adapter.cpython-38.pyc
    │   │   │   │   └── trainer.cpython-38.pyc
    │   │   │   ├── adapter.py
    │   │   │   ├── trainer.py
    │   │   │   ├── loader.py
    │   │   │   └── parser.py
    │   │   └── tune.py
    │   ├── webui
    │   │   ├── __init__.py
    │   │   ├── components
    │   │   │   ├── __init__.py
    │   │   │   ├── data.py
    │   │   │   ├── export.py
    │   │   │   ├── infer.py
    │   │   │   ├── chatbot.py
    │   │   │   ├── top.py
    │   │   │   ├── eval.py
    │   │   │   └── train.py
    │   │   ├── css.py
    │   │   ├── manager.py
    │   │   ├── interface.py
    │   │   ├── common.py
    │   │   ├── chat.py
    │   │   └── utils.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-38.pyc
    │   ├── dsets
    │   │   ├── __pycache__
    │   │   │   ├── loader.cpython-38.pyc
    │   │   │   ├── utils.cpython-38.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── preprocess.cpython-38.pyc
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── loader.py
    │   ├── hparams
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── data_args.cpython-38.pyc
    │   │   │   ├── model_args.cpython-38.pyc
    │   │   │   ├── general_args.cpython-38.pyc
    │   │   │   ├── finetuning_args.cpython-38.pyc
    │   │   │   └── generating_args.cpython-38.pyc
    │   │   ├── __init__.py
    │   │   ├── general_args.py
    │   │   ├── generating_args.py
    │   │   ├── model_args.py
    │   │   ├── finetuning_args.py
    │   │   └── data_args.py
    │   └── __init__.py
    ├── export_model.py
    ├── train_bash.py
    ├── train_web.py
    ├── web_demo.py
    ├── api_demo.py
    └── cli_demo.py
├── .DS_Store
├── data
    ├── .DS_Store
    ├── example_dataset
    │   ├── examples.json
    │   └── example_dataset.py
    ├── README_zh.md
    ├── README.md
    └── dataset_info.json
├── scripts
    ├── .DS_Store
    ├── devops-model-pt.sh
    └── devops-model-sft.sh
├── LEGAL.md
├── requirements.txt
├── conf
    └── deepspeed_config.json
├── README.md
└── README_EN.md


/imgs/test:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/llmtuner/api/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.api.app import create_app
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/chat/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.chat.stream_chat import ChatModel
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/pt/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.pt.workflow import run_pt
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.rm.workflow import run_rm
2 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/.DS_Store


--------------------------------------------------------------------------------
/src/llmtuner/tuner/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.tune import export_model, run_exp
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.dpo.workflow import run_dpo
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.ppo.workflow import run_ppo
2 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.sft.workflow import run_sft
2 | 


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/data/.DS_Store


--------------------------------------------------------------------------------
/src/llmtuner/webui/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.webui.interface import create_ui, create_web_demo
2 | 


--------------------------------------------------------------------------------
/imgs/qrcode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/imgs/qrcode.png


--------------------------------------------------------------------------------
/imgs/wechat2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/imgs/wechat2.png


--------------------------------------------------------------------------------
/imgs/wechat3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/imgs/wechat3.png


--------------------------------------------------------------------------------
/scripts/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/scripts/.DS_Store


--------------------------------------------------------------------------------
/src/llmtuner/api/__pycache__/app.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/api/__pycache__/app.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/export_model.py:
--------------------------------------------------------------------------------
 1 | from llmtuner import export_model
 2 | 
 3 | 
 4 | def main():
 5 |     export_model()
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     main()
10 | 


--------------------------------------------------------------------------------
/src/llmtuner/api/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/api/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/dsets/__pycache__/loader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/dsets/__pycache__/loader.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/dsets/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/dsets/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/__pycache__/tune.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/__pycache__/tune.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/chat/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/chat/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/dsets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/dsets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/logging.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/logging.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/ploting.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/ploting.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.tuner.core.parser import get_train_args, get_infer_args
2 | from llmtuner.tuner.core.loader import load_model_and_tokenizer
3 | 


--------------------------------------------------------------------------------
/src/llmtuner/chat/__pycache__/stream_chat.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/chat/__pycache__/stream_chat.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/dsets/__pycache__/preprocess.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/dsets/__pycache__/preprocess.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/callbacks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/callbacks.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/constants.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/constants.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/template.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/template.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/data_args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/data_args.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__pycache__/loader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/core/__pycache__/loader.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__pycache__/parser.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/core/__pycache__/parser.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/__pycache__/trainer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/dpo/__pycache__/trainer.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/__pycache__/trainer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/ppo/__pycache__/trainer.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/ppo/__pycache__/utils.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/pt/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/pt/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/pt/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/pt/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/rm/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__pycache__/collator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/rm/__pycache__/collator.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__pycache__/metric.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/rm/__pycache__/metric.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__pycache__/trainer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/rm/__pycache__/trainer.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/rm/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/__pycache__/metric.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/sft/__pycache__/metric.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/__pycache__/trainer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/sft/__pycache__/trainer.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/model_args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/model_args.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/core/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__pycache__/adapter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/core/__pycache__/adapter.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/__pycache__/trainer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/core/__pycache__/trainer.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/dpo/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/__pycache__/collator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/dpo/__pycache__/collator.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/dpo/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/ppo/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/ppo/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/sft/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/__pycache__/workflow.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/tuner/sft/__pycache__/workflow.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/dsets/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.dsets.loader import get_dataset
2 | from llmtuner.dsets.preprocess import preprocess_dataset
3 | from llmtuner.dsets.utils import split_dataset
4 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/__pycache__/save_and_load.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/extras/__pycache__/save_and_load.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/general_args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/general_args.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/finetuning_args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/finetuning_args.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__pycache__/generating_args.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/CodeFuse-DevOps-Model/main/src/llmtuner/hparams/__pycache__/generating_args.cpython-38.pyc


--------------------------------------------------------------------------------
/src/llmtuner/hparams/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_args import DataArguments
2 | from .finetuning_args import FinetuningArguments
3 | from .general_args import GeneralArguments
4 | from .generating_args import GeneratingArguments
5 | from .model_args import ModelArguments
6 | 


--------------------------------------------------------------------------------
/src/train_bash.py:
--------------------------------------------------------------------------------
 1 | from llmtuner import run_exp
 2 | # import sys
 3 | 
 4 | def main():
 5 |     # print(sys.argv)
 6 |     run_exp()
 7 | 
 8 | 
 9 | def _mp_fn(index):
10 |     # For xla_spawn (TPUs)
11 |     main()
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     main()
16 | 


--------------------------------------------------------------------------------
/src/train_web.py:
--------------------------------------------------------------------------------
 1 | from llmtuner import create_ui
 2 | 
 3 | 
 4 | def main():
 5 |     demo = create_ui()
 6 |     demo.queue()
 7 |     demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     main()
12 | 


--------------------------------------------------------------------------------
/src/web_demo.py:
--------------------------------------------------------------------------------
 1 | from llmtuner import create_web_demo
 2 | 
 3 | 
 4 | def main():
 5 |     demo = create_web_demo()
 6 |     demo.queue()
 7 |     demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     main()
12 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/metric.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from typing import Dict, Sequence, Tuple, Union
3 | 
4 | 
5 | def compute_accuracy(eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]:
6 |     preds, _ = eval_preds
7 |     return {"accuracy": (preds[0] > preds[1]).sum() / len(preds[0])}
8 | 


--------------------------------------------------------------------------------
/src/llmtuner/__init__.py:
--------------------------------------------------------------------------------
 1 | # Level: api, webui > chat > tuner > dsets > extras, hparams
 2 | 
 3 | # from llmtuner.api import create_app
 4 | # from llmtuner.chat import ChatModel
 5 | from llmtuner.tuner import export_model, run_exp
 6 | # from llmtuner.webui import create_ui, create_web_demo
 7 | 
 8 | 
 9 | __version__ = "0.1.7"
10 | 


--------------------------------------------------------------------------------
/src/api_demo.py:
--------------------------------------------------------------------------------
 1 | import uvicorn
 2 | 
 3 | from llmtuner import ChatModel, create_app
 4 | 
 5 | 
 6 | def main():
 7 |     chat_model = ChatModel()
 8 |     app = create_app(chat_model)
 9 |     uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
10 |     print("Visit http://localhost:8000/docs for API document.")
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     main()
15 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/__init__.py:
--------------------------------------------------------------------------------
1 | from llmtuner.webui.components.top import create_top
2 | from llmtuner.webui.components.train import create_train_tab
3 | from llmtuner.webui.components.eval import create_eval_tab
4 | from llmtuner.webui.components.infer import create_infer_tab
5 | from llmtuner.webui.components.export import create_export_tab
6 | from llmtuner.webui.components.chatbot import create_chat_box
7 | 


--------------------------------------------------------------------------------
/LEGAL.md:
--------------------------------------------------------------------------------
1 | Legal Disclaimer
2 | 
3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4 | 
5 | 法律免责声明
6 | 
7 | 关于代码注释部分，中文注释为官方版本，其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致，当中文注释与其它语言注释存在不一致时，请以中文注释为准。


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch>=2.0.1
 2 | transformers==4.32.0
 3 | datasets>=2.12.0
 4 | accelerate>=0.21.0
 5 | peft>=0.4.0
 6 | trl>=0.5.0
 7 | scipy
 8 | sentencepiece
 9 | tiktoken
10 | jieba
11 | rouge-chinese
12 | nltk
13 | gradio>=3.36.0
14 | uvicorn
15 | pydantic==1.10.11
16 | fastapi==0.95.1
17 | sse-starlette
18 | matplotlib
19 | loguru
20 | jsonlines
21 | transformers_stream_generator==0.0.4
22 | deepspeed>=0.9.3
23 | einops
24 | 


--------------------------------------------------------------------------------
/src/llmtuner/hparams/general_args.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Optional
 2 | from dataclasses import dataclass, field
 3 | 
 4 | 
 5 | @dataclass
 6 | class GeneralArguments:
 7 |     r"""
 8 |     Arguments pertaining to which stage we are going to perform.
 9 |     """
10 |     stage: Optional[Literal["pt", "sft", "rm", "ppo", "dpo"]] = field(
11 |         default="sft",
12 |         metadata={"help": "Which stage will be performed in training."}
13 |     )
14 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/css.py:
--------------------------------------------------------------------------------
 1 | CSS = r"""
 2 | .modal-box {
 3 |   position: fixed !important;
 4 |   top: 50%;
 5 |   left: 50%;
 6 |   transform: translate(-50%, -50%); /* center horizontally */
 7 |   max-width: 1000px;
 8 |   max-height: 750px;
 9 |   overflow-y: scroll !important;
10 |   background-color: var(--input-background-fill);
11 |   border: 2px solid black !important;
12 |   z-index: 1000;
13 | }
14 | 
15 | .dark .modal-box {
16 |   border: 2px solid white !important;
17 | }
18 | """
19 | 


--------------------------------------------------------------------------------
/conf/deepspeed_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_micro_batch_size_per_gpu": "auto",
 3 |   "gradient_accumulation_steps": "auto",
 4 |   "gradient_clipping": "auto",
 5 |   "zero_allow_untested_optimizer": true,
 6 |   "bf16": {
 7 |    "enabled": true
 8 |     },  
 9 |   "zero_optimization": {
10 |     "stage": 2,
11 |     "offload_optimizer": {
12 |          "device": "cpu",
13 |          "pin_memory": true
14 |      },
15 |     "allgather_partitions": true,
16 |     "allgather_bucket_size": 5e8,
17 |     "reduce_scatter": true,
18 |     "reduce_bucket_size": 5e8,
19 |     "overlap_comm": false,
20 |     "contiguous_gradients": true
21 |   }
22 | }


--------------------------------------------------------------------------------
/data/example_dataset/examples.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "instruction": "听起来很不错。人工智能可能在哪些方面面临挑战呢？",
 4 |     "input": "",
 5 |     "output": "人工智能面临的挑战包括数据隐私、安全和道德方面的问题，以及影响就业机会的自动化等问题。",
 6 |     "history": [
 7 |       ["你好，你能帮我解答一个问题吗？", "当然，请问有什么问题？"],
 8 |       ["我想了解人工智能的未来发展方向，你有什么想法吗？", "人工智能在未来的发展方向可能包括更强大的机器学习算法，更先进的自然语言处理技术，以及更加智能的机器人。"]
 9 |     ]
10 |   },
11 |   {
12 |     "instruction": "好的，谢谢你！",
13 |     "input": "",
14 |     "output": "不客气，有其他需要帮忙的地方可以继续问我。",
15 |     "history": [
16 |       ["你好，能告诉我今天天气怎么样吗？", "当然可以，请问您所在的城市是哪里？"],
17 |       ["我在纽约。", "纽约今天晴间多云，气温最高约26摄氏度，最低约18摄氏度，记得注意保暖喔。"]
18 |     ]
19 |   }
20 | ]
21 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/data.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from typing import TYPE_CHECKING, Tuple
 3 | 
 4 | if TYPE_CHECKING:
 5 |     from gradio.blocks import Block
 6 |     from gradio.components import Component
 7 | 
 8 | 
 9 | def create_preview_box() -> Tuple["Block", "Component", "Component", "Component"]:
10 |     with gr.Box(visible=False, elem_classes="modal-box") as preview_box:
11 |         with gr.Row():
12 |             preview_count = gr.Number(interactive=False)
13 | 
14 |         with gr.Row():
15 |             preview_samples = gr.JSON(interactive=False)
16 | 
17 |         close_btn = gr.Button()
18 | 
19 |     close_btn.click(lambda: gr.update(visible=False), outputs=[preview_box], queue=False)
20 | 
21 |     return preview_box, preview_count, preview_samples, close_btn
22 | 


--------------------------------------------------------------------------------
/data/README_zh.md:
--------------------------------------------------------------------------------
 1 | 如果您使用自定义数据集，请务必在 `dataset_info.json` 文件中以如下格式提供您的数据集定义。
 2 | 
 3 | ```json
 4 | "数据集名称": {
 5 |   "hf_hub_url": "HuggingFace上的项目地址（若指定，则忽略下列三个参数）",
 6 |   "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略下列两个参数）",
 7 |   "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
 8 |   "file_sha1": "数据集文件的SHA-1哈希值（可选）",
 9 |   "columns": {
10 |     "prompt": "数据集代表提示词的表头名称（默认：instruction）",
11 |     "query": "数据集代表请求的表头名称（默认：input）",
12 |     "response": "数据集代表回答的表头名称（默认：output）",
13 |     "history": "数据集代表历史对话的表头名称（默认：None）"
14 |   },
15 |   "stage": "数据所应用的训练阶段，可选值有 pt, sft, rm 三个，对应预训练，指令监督微调(PPO)，奖励模型(DPO)训练, 默认为None，表示不限制"
16 | }
17 | ```
18 | 
19 | 其中 `prompt` 和 `response` 列应当是非空的字符串。`query` 列的内容将会和 `prompt` 列拼接作为模型输入。`history` 列应当是一个列表，其中每个元素是一个字符串二元组，分别代表用户请求和模型答复。
20 | 
21 | 对于奖励模型或 DPO 训练的数据集，`response` 列应当是一个字符串列表，排在前面的代表更优的答案，例如：
22 | 
23 | ```json
24 | {
25 |   "instruction": "Question",
26 |   "input": "",
27 |   "output": [
28 |     "Chosen answer",
29 |     "Rejected answer"
30 |   ],
31 |   "stage": "rm"
32 | }
33 | ```
34 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/collator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from dataclasses import dataclass
 3 | from typing import Any, Dict, Sequence
 4 | from transformers import DataCollatorWithPadding
 5 | 
 6 | 
 7 | @dataclass
 8 | class PairwiseDataCollatorWithPadding(DataCollatorWithPadding):
 9 |     r"""
10 |     Data collator for pairwise data.
11 |     """
12 | 
13 |     def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
14 |         r"""
15 |         Pads batched data to the longest sequence in the batch.
16 | 
17 |         We generate 2 * n examples where the first n examples represent chosen examples and
18 |         the last n examples represent rejected examples.
19 |         """
20 |         features = [
21 |             {
22 |                 "input_ids": feature["prompt_ids"] + feature[key],
23 |                 "attention_mask": [1] * (len(feature["prompt_ids"]) + len(feature[key]))
24 |             }
25 |             for key in ("chosen_ids", "rejected_ids") for feature in features
26 |         ]
27 |         return super().__call__(features)
28 | 


--------------------------------------------------------------------------------
/scripts/devops-model-pt.sh:
--------------------------------------------------------------------------------
 1 | set -v 
 2 | 
 3 | nvidia-smi
 4 | 
 5 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
 6 |     --deepspeed conf/deepspeed_config.json \
 7 |     --stage pt \
 8 |     --model_name_or_path path_to_model \
 9 |     --do_train \
10 |     --report_to 'tensorboard' \
11 |     --dataset your_corpus \
12 |     --template default \
13 |     --finetuning_type full \
14 |     --output_dir path_to_output_checkpoint_path \
15 |     --overwrite_cache \
16 |     --per_device_train_batch_size 8 \
17 |     --per_device_eval_batch_size 8 \
18 |     --gradient_accumulation_steps 1 \
19 |     --lr_scheduler_type cosine \
20 |     --warmup_ratio 0.05 \
21 |     --evaluation_strategy steps \
22 |     --logging_steps 10 \
23 |     --max_steps 1000 \
24 |     --save_steps 1000 \
25 |     --eval_steps 1000 \
26 |     --learning_rate 5e-6 \
27 |     --plot_loss \
28 |     --max_source_length=2048 \
29 |     --dataloader_num_workers 8 \
30 |     --val_size 0.01 \
31 |     --bf16 \
32 |     --overwrite_output_dir


--------------------------------------------------------------------------------
/scripts/devops-model-sft.sh:
--------------------------------------------------------------------------------
 1 | set -v 
 2 | 
 3 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
 4 |     --deepspeed path_to_deepspeed_config \
 5 |     --stage sft \
 6 |     --model_name_or_path path_to_model_path \
 7 |     --do_train \
 8 |     --report_to 'tensorboard' \
 9 |     --dataset your_corpus \
10 |     --template chatml \
11 |     --finetuning_type full \
12 |     --output_dir path_to_save_model_path \
13 |     --overwrite_cache \
14 |     --per_device_train_batch_size 8 \
15 |     --per_device_eval_batch_size 8 \
16 |     --gradient_accumulation_steps 4 \
17 |     --lr_scheduler_type cosine \
18 |     --warmup_ratio 0.20 \
19 |     --save_strategy epoch \
20 |     --evaluation_strategy epoch \
21 |     --num_train_epochs 3 \
22 |     --logging_steps 10 \
23 |     --learning_rate 5e-6 \
24 |     --plot_loss \
25 |     --max_source_length=2048 \
26 |     --max_target_length=2048 \
27 |     --dataloader_num_workers 8 \
28 |     --val_size 0.01 \
29 |     --bf16 \
30 |     --overwrite_output_dir \
31 |     --max_grad_norm 1.0
32 |     


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/export.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Dict
 2 | import gradio as gr
 3 | 
 4 | from llmtuner.webui.utils import save_model
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from gradio.components import Component
 8 | 
 9 | 
10 | def create_export_tab(top_elems: Dict[str, "Component"]) -> Dict[str, "Component"]:
11 |     with gr.Row():
12 |         save_dir = gr.Textbox()
13 |         max_shard_size = gr.Slider(value=10, minimum=1, maximum=100)
14 | 
15 |     export_btn = gr.Button()
16 |     info_box = gr.Textbox(show_label=False, interactive=False)
17 | 
18 |     export_btn.click(
19 |         save_model,
20 |         [
21 |             top_elems["lang"],
22 |             top_elems["model_name"],
23 |             top_elems["checkpoints"],
24 |             top_elems["finetuning_type"],
25 |             top_elems["template"],
26 |             max_shard_size,
27 |             save_dir
28 |         ],
29 |         [info_box]
30 |     )
31 | 
32 |     return dict(
33 |         save_dir=save_dir,
34 |         max_shard_size=max_shard_size,
35 |         export_btn=export_btn,
36 |         info_box=info_box
37 |     )
38 | 


--------------------------------------------------------------------------------
/src/cli_demo.py:
--------------------------------------------------------------------------------
 1 | from llmtuner import ChatModel
 2 | 
 3 | 
 4 | def main():
 5 |     chat_model = ChatModel()
 6 |     history = []
 7 |     print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
 8 | 
 9 |     while True:
10 |         try:
11 |             query = input("\nUser: ")
12 |         except UnicodeDecodeError:
13 |             print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
14 |             continue
15 |         except Exception:
16 |             raise
17 | 
18 |         if query.strip() == "exit":
19 |             break
20 | 
21 |         if query.strip() == "clear":
22 |             history = []
23 |             print("History has been removed.")
24 |             continue
25 | 
26 |         print("Assistant: ", end="", flush=True)
27 | 
28 |         response = ""
29 |         for new_text in chat_model.stream_chat(query, history):
30 |             print(new_text, end="", flush=True)
31 |             response += new_text
32 |         print()
33 | 
34 |         history = history + [(query, response)]
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/logging.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import logging
 3 | from transformers.utils import logging as t_logging
 4 | 
 5 | from loguru import logger
 6 | 
 7 | class LoggerHandler(logging.Handler):
 8 | 
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.log = ""
12 | 
13 |     def reset(self):
14 |         self.log = ""
15 | 
16 |     def emit(self, record):
17 |         if record.name == "httpx":
18 |             return
19 |         log_entry = self.format(record)
20 |         self.log += log_entry
21 |         self.log += "\n\n"
22 | 
23 | 
24 | def reset_logging():
25 |     r"""
26 |     Removes basic config of root logger
27 |     """
28 |     root = logging.getLogger()
29 |     list(map(root.removeHandler, root.handlers))
30 |     list(map(root.removeFilter, root.filters))
31 | 
32 | 
33 | # def get_logger(name: str) -> logging.Logger:
34 | #     formatter = logging.Formatter(
35 | #         fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
36 | #         datefmt="%m/%d/%Y %H:%M:%S"
37 | #     )
38 | #     handler = logging.StreamHandler(sys.stdout)
39 | #     handler.setFormatter(formatter)
40 | 
41 | #     logger = logging.getLogger(name)
42 | #     logger.setLevel(logging.INFO)
43 | #     logger.addHandler(handler)
44 | 
45 | #     return logger
46 | 
47 | # def get_logger(name: str):
48 | #     return logger
49 | 
50 | def get_logger(name: str):
51 |     logger = t_logging.get_logger('transformers')
52 |     return logger


--------------------------------------------------------------------------------
/data/example_dataset/example_dataset.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import datasets
 3 | from typing import Any, Dict, List
 4 | 
 5 | 
 6 | _DESCRIPTION = "An example of dataset for LLaMA."
 7 | _CITATION = ""
 8 | _HOMEPAGE = ""
 9 | _LICENSE = ""
10 | _URL = "examples.json"
11 | 
12 | 
13 | class ExampleDataset(datasets.GeneratorBasedBuilder):
14 | 
15 |     VERSION = datasets.Version("0.0.0")
16 | 
17 |     def _info(self) -> datasets.DatasetInfo:
18 |         features = datasets.Features({
19 |             "instruction": datasets.Value("string"),
20 |             "input": datasets.Value("string"),
21 |             "output": datasets.Value("string"),
22 |             "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
23 |         })
24 |         return datasets.DatasetInfo(
25 |             description=_DESCRIPTION,
26 |             features=features,
27 |             homepage=_HOMEPAGE,
28 |             license=_LICENSE,
29 |             citation=_CITATION
30 |         )
31 | 
32 |     def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
33 |         file_path = dl_manager.download(_URL)
34 |         return [
35 |             datasets.SplitGenerator(
36 |                 name=datasets.Split.TRAIN,
37 |                 gen_kwargs={
38 |                     "filepath": file_path
39 |                 }
40 |             )
41 |         ]
42 | 
43 |     def _generate_examples(self, filepath: str) -> Dict[int, Dict[str, Any]]:
44 |         example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
45 |         for key, example in enumerate(example_dataset):
46 |             yield key, example
47 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/infer.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Dict
 2 | 
 3 | import gradio as gr
 4 | 
 5 | from llmtuner.webui.chat import WebChatModel
 6 | from llmtuner.webui.components.chatbot import create_chat_box
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from gradio.components import Component
10 | 
11 | 
12 | def create_infer_tab(top_elems: Dict[str, "Component"]) -> Dict[str, "Component"]:
13 |     with gr.Row():
14 |         load_btn = gr.Button()
15 |         unload_btn = gr.Button()
16 | 
17 |     info_box = gr.Textbox(show_label=False, interactive=False)
18 | 
19 |     chat_model = WebChatModel()
20 |     chat_box, chatbot, history, chat_elems = create_chat_box(chat_model)
21 | 
22 |     load_btn.click(
23 |         chat_model.load_model,
24 |         [
25 |             top_elems["lang"],
26 |             top_elems["model_name"],
27 |             top_elems["checkpoints"],
28 |             top_elems["finetuning_type"],
29 |             top_elems["quantization_bit"],
30 |             top_elems["template"],
31 |             top_elems["system_prompt"]
32 |         ],
33 |         [info_box]
34 |     ).then(
35 |         lambda: gr.update(visible=(chat_model.model is not None)), outputs=[chat_box]
36 |     )
37 | 
38 |     unload_btn.click(
39 |         chat_model.unload_model, [top_elems["lang"]], [info_box]
40 |     ).then(
41 |         lambda: ([], []), outputs=[chatbot, history]
42 |     ).then(
43 |         lambda: gr.update(visible=(chat_model.model is not None)), outputs=[chat_box]
44 |     )
45 | 
46 |     return dict(
47 |         info_box=info_box,
48 |         load_btn=load_btn,
49 |         unload_btn=unload_btn,
50 |         **chat_elems
51 |     )
52 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/manager.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from gradio.components import Component
 3 | from typing import Any, Dict, List
 4 | 
 5 | from llmtuner.webui.common import get_model_path, list_dataset, load_config
 6 | from llmtuner.webui.locales import LOCALES
 7 | from llmtuner.webui.utils import get_time
 8 | 
 9 | 
10 | class Manager:
11 | 
12 |     def __init__(self, elem_list: List[Dict[str, Component]]):
13 |         self.elem_list = elem_list
14 | 
15 |     def gen_refresh(self, lang: str) -> Dict[str, Any]:
16 |         refresh_dict = {
17 |             "dataset": {"choices": list_dataset()["choices"]},
18 |             "output_dir": {"value": get_time()}
19 |         }
20 | 
21 |         user_config = load_config()
22 |         if not lang:
23 |             if user_config.get("lang", None):
24 |                 lang = user_config["lang"]
25 |             else:
26 |                 lang = "en"
27 | 
28 |         refresh_dict["lang"] = {"value": lang}
29 | 
30 |         if user_config.get("last_model", None):
31 |             refresh_dict["model_name"] = {"value": user_config["last_model"]}
32 |             refresh_dict["model_path"] = {"value": get_model_path(user_config["last_model"])}
33 | 
34 |         return refresh_dict
35 | 
36 |     def gen_label(self, lang: str) -> Dict[Component, Dict[str, Any]]: # cannot use TYPE_CHECKING
37 |         update_dict = {}
38 |         refresh_dict = self.gen_refresh(lang)
39 | 
40 |         for elems in self.elem_list:
41 |             for name, component in elems.items():
42 |                 update_dict[component] = gr.update(
43 |                     **LOCALES[name][refresh_dict["lang"]["value"]], **refresh_dict.get(name, {})
44 |                 )
45 | 
46 |         return update_dict
47 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple
 3 | 
 4 | from llmtuner.extras.constants import LAYERNORM_NAMES
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from trl import AutoModelForCausalLMWithValueHead
 8 | 
 9 | 
10 | def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None:
11 |     if target == "reward": # save default head temporarily
12 |         valuehead_state_dict = model.v_head.state_dict()
13 |         setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"])
14 |         setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"])
15 | 
16 |     model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active
17 |     model.v_head.load_state_dict({
18 |         "summary.weight": getattr(model, "{}_head_weight".format(target)),
19 |         "summary.bias": getattr(model, "{}_head_bias".format(target))
20 |     })
21 | 
22 | 
23 | def cast_layernorm_dtype(
24 |     model: "AutoModelForCausalLMWithValueHead",
25 |     compute_dtype: torch.dtype,
26 |     layer_norm_params: Optional[Dict[str, torch.Tensor]] = None,
27 |     layer_norm_names: Optional[List[str]] = LAYERNORM_NAMES
28 | ) -> Tuple["AutoModelForCausalLMWithValueHead", Dict[str, torch.Tensor]]:
29 | 
30 |     layer_norm_state_dict = {}
31 | 
32 |     for name, param in model.named_parameters():
33 |         if param.ndim == 1 and any(layer_norm_name in name for layer_norm_name in layer_norm_names):
34 |             if layer_norm_params is None:
35 |                 layer_norm_state_dict[name] = param.data.detach().clone() # store float32 weights for stability
36 |                 param.data = param.data.to(compute_dtype)
37 |             else:
38 |                 param.data = layer_norm_params[name] # restore float32 weights
39 | 
40 |     return model, layer_norm_state_dict
41 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | If you are using a custom dataset, please provide your dataset definition in the following format in `dataset_info.json`.
 2 | 
 3 | ```json
 4 | "dataset_name": {
 5 |   "hf_hub_url": "the name of the dataset repository on the HuggingFace hub. (if specified, ignore below 3 arguments)",
 6 |   "script_url": "the name of the directory containing a dataset loading script. (if specified, ignore below 2 arguments)",
 7 |   "file_name": "the name of the dataset file in the this directory. (required if above are not specified)",
 8 |   "file_sha1": "the SHA-1 hash value of the dataset file. (optional)",
 9 |   "columns": {
10 |     "prompt": "the name of the column in the datasets containing the prompts. (default: instruction)",
11 |     "query": "the name of the column in the datasets containing the queries. (default: input)",
12 |     "response": "the name of the column in the datasets containing the responses. (default: output)",
13 |     "history": "the name of the column in the datasets containing the history of chat. (default: None)"
14 |   },
15 |   "stage": "The stage at which the data is being used: pt, sft, and rm, which correspond to pre-training, supervised fine-tuning(PPO), and reward model (DPO) training, respectively.(default: None)"
16 | }
17 | ```
18 | 
19 | where the `prompt` and `response` columns should contain non-empty values. The `query` column will be concatenated with the `prompt` column and used as input for the model. The `history` column should contain a list where each element is a string tuple representing a query-response pair.
20 | 
21 | For datasets used in reward modeling or DPO training, the `response` column should be a string list, with the preferred answers appearing first, for example:
22 | 
23 | ```json
24 | {
25 |   "instruction": "Question",
26 |   "input": "",
27 |   "output": [
28 |     "Chosen answer",
29 |     "Rejected answer"
30 |   ],
31 |   "stage": "rm"
32 | }
33 | ```
34 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/chatbot.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Dict, Optional, Tuple
 2 | 
 3 | import gradio as gr
 4 | 
 5 | if TYPE_CHECKING:
 6 |     from gradio.blocks import Block
 7 |     from gradio.components import Component
 8 |     from llmtuner.webui.chat import WebChatModel
 9 | 
10 | 
11 | def create_chat_box(
12 |     chat_model: "WebChatModel",
13 |     visible: Optional[bool] = False
14 | ) -> Tuple["Block", "Component", "Component", Dict[str, "Component"]]:
15 |     with gr.Box(visible=visible) as chat_box:
16 |         chatbot = gr.Chatbot()
17 | 
18 |         with gr.Row():
19 |             with gr.Column(scale=4):
20 |                 system = gr.Textbox(show_label=False)
21 |                 query = gr.Textbox(show_label=False, lines=8)
22 |                 submit_btn = gr.Button(variant="primary")
23 | 
24 |             with gr.Column(scale=1):
25 |                 clear_btn = gr.Button()
26 |                 max_new_tokens = gr.Slider(10, 2048, value=chat_model.generating_args.max_new_tokens, step=1)
27 |                 top_p = gr.Slider(0.01, 1, value=chat_model.generating_args.top_p, step=0.01)
28 |                 temperature = gr.Slider(0.01, 1.5, value=chat_model.generating_args.temperature, step=0.01)
29 | 
30 |     history = gr.State([])
31 | 
32 |     submit_btn.click(
33 |         chat_model.predict,
34 |         [chatbot, query, history, system, max_new_tokens, top_p, temperature],
35 |         [chatbot, history],
36 |         show_progress=True
37 |     ).then(
38 |         lambda: gr.update(value=""), outputs=[query]
39 |     )
40 | 
41 |     clear_btn.click(lambda: ([], []), outputs=[chatbot, history], show_progress=True)
42 | 
43 |     return chat_box, chatbot, history, dict(
44 |         system=system,
45 |         query=query,
46 |         submit_btn=submit_btn,
47 |         clear_btn=clear_btn,
48 |         max_new_tokens=max_new_tokens,
49 |         top_p=top_p,
50 |         temperature=temperature
51 |     )
52 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/ploting.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import math
 3 | import json
 4 | import matplotlib.pyplot as plt
 5 | from typing import List, Optional
 6 | from transformers.trainer import TRAINER_STATE_NAME
 7 | 
 8 | from llmtuner.extras.logging import get_logger
 9 | 
10 | 
11 | logger = get_logger(__name__)
12 | 
13 | 
14 | def smooth(scalars: List[float]) -> List[float]:
15 |     r"""
16 |     EMA implementation according to TensorBoard.
17 |     """
18 |     last = scalars[0]
19 |     smoothed = list()
20 |     weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function
21 |     for next_val in scalars:
22 |         smoothed_val = last * weight + (1 - weight) * next_val
23 |         smoothed.append(smoothed_val)
24 |         last = smoothed_val
25 |     return smoothed
26 | 
27 | 
28 | def plot_loss(save_dictionary: os.PathLike, keys: Optional[List[str]] = ["loss"]) -> None:
29 | 
30 |     with open(os.path.join(save_dictionary, TRAINER_STATE_NAME), "r", encoding="utf-8") as f:
31 |         data = json.load(f)
32 | 
33 |     for key in keys:
34 |         steps, metrics = [], []
35 |         for i in range(len(data["log_history"])):
36 |             if key in data["log_history"][i]:
37 |                 steps.append(data["log_history"][i]["step"])
38 |                 metrics.append(data["log_history"][i][key])
39 | 
40 |         if len(metrics) == 0:
41 |             logger.warning(f"No metric {key} to plot.")
42 |             continue
43 | 
44 |         plt.figure()
45 |         plt.plot(steps, metrics, alpha=0.4, label="original")
46 |         plt.plot(steps, smooth(metrics), label="smoothed")
47 |         plt.title("training {} of {}".format(key, save_dictionary))
48 |         plt.xlabel("step")
49 |         plt.ylabel(key)
50 |         plt.legend()
51 |         plt.savefig(os.path.join(save_dictionary, "training_{}.png".format(key)), format="png", dpi=100)
52 |         print("Figure saved:", os.path.join(save_dictionary, "training_{}.png".format(key)))
53 | 


--------------------------------------------------------------------------------
/src/llmtuner/hparams/generating_args.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional
 2 | from dataclasses import asdict, dataclass, field
 3 | 
 4 | 
 5 | @dataclass
 6 | class GeneratingArguments:
 7 |     r"""
 8 |     Arguments pertaining to specify the decoding parameters.
 9 |     """
10 |     do_sample: Optional[bool] = field(
11 |         default=True,
12 |         metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."}
13 |     )
14 |     temperature: Optional[float] = field(
15 |         default=0.95,
16 |         metadata={"help": "The value used to modulate the next token probabilities."}
17 |     )
18 |     top_p: Optional[float] = field(
19 |         default=0.7,
20 |         metadata={"help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."}
21 |     )
22 |     top_k: Optional[int] = field(
23 |         default=50,
24 |         metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."}
25 |     )
26 |     num_beams: Optional[int] = field(
27 |         default=1,
28 |         metadata={"help": "Number of beams for beam search. 1 means no beam search."}
29 |     )
30 |     max_length: Optional[int] = field(
31 |         default=None,
32 |         metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."}
33 |     )
34 |     max_new_tokens: Optional[int] = field(
35 |         default=512,
36 |         metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."}
37 |     )
38 |     repetition_penalty: Optional[float] = field(
39 |         default=1.0,
40 |         metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."}
41 |     )
42 |     length_penalty: Optional[float] = field(
43 |         default=1.0,
44 |         metadata={"help": "Exponential penalty to the length that is used with beam-based generation."}
45 |     )
46 | 
47 |     def to_dict(self) -> Dict[str, Any]:
48 |         args = asdict(self)
49 |         if args.get("max_new_tokens", None):
50 |             args.pop("max_length", None)
51 |         return args
52 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from dataclasses import dataclass
 3 | from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union
 4 | 
 5 | import jieba
 6 | from rouge_chinese import Rouge
 7 | from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
 8 | 
 9 | from llmtuner.extras.constants import IGNORE_INDEX
10 | 
11 | if TYPE_CHECKING:
12 |     from transformers.tokenization_utils import PreTrainedTokenizer
13 | 
14 | 
15 | @dataclass
16 | class ComputeMetrics:
17 |     r"""
18 |     Wraps the tokenizer into metric functions, used in Seq2SeqPeftTrainer.
19 |     """
20 | 
21 |     tokenizer: "PreTrainedTokenizer"
22 | 
23 |     def __call__(self, eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]:
24 |         r"""
25 |         Uses the model predictions to compute metrics.
26 |         """
27 |         preds, labels = eval_preds
28 |         score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []}
29 | 
30 |         preds = np.where(preds != IGNORE_INDEX, preds, self.tokenizer.pad_token_id)
31 |         labels = np.where(labels != IGNORE_INDEX, labels, self.tokenizer.pad_token_id)
32 | 
33 |         decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True)
34 |         decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True)
35 | 
36 |         for pred, label in zip(decoded_preds, decoded_labels):
37 |             hypothesis = list(jieba.cut(pred))
38 |             reference = list(jieba.cut(label))
39 | 
40 |             if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0:
41 |                 result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}}
42 |             else:
43 |                 rouge = Rouge()
44 |                 scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference))
45 |                 result = scores[0]
46 | 
47 |             for k, v in result.items():
48 |                 score_dict[k].append(round(v["f"] * 100, 4))
49 | 
50 |             bleu_score = sentence_bleu([list(label)], list(pred), smoothing_function=SmoothingFunction().method3)
51 |             score_dict["bleu-4"].append(round(bleu_score * 100, 4))
52 | 
53 |         return {k: float(np.mean(v)) for k, v in score_dict.items()}
54 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/save_and_load.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from typing import Dict
 4 | 
 5 | from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME
 6 | from transformers.modeling_utils import load_sharded_checkpoint
 7 | 
 8 | from llmtuner.extras.constants import VALUE_HEAD_FILE_NAME
 9 | from llmtuner.extras.logging import get_logger
10 | 
11 | 
12 | logger = get_logger(__name__)
13 | 
14 | 
15 | def get_state_dict(model: torch.nn.Module) -> Dict[str, torch.Tensor]:
16 |     state_dict: Dict[str, torch.Tensor] = model.state_dict()
17 |     filtered_state_dict = {}
18 | 
19 |     for k, v in model.named_parameters():
20 |         if v.requires_grad:
21 |             filtered_state_dict[k] = state_dict[k].cpu().clone().detach()
22 | 
23 |     return filtered_state_dict
24 | 
25 | 
26 | def load_trainable_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool:
27 |     weights_file = os.path.join(checkpoint_dir, WEIGHTS_NAME)
28 |     if os.path.exists(weights_file):
29 |         model_state_dict = torch.load(weights_file, map_location="cpu")
30 |         model.load_state_dict(model_state_dict, strict=False) # skip missing keys
31 |     elif os.path.exists(os.path.join(checkpoint_dir, WEIGHTS_INDEX_NAME)):
32 |         load_sharded_checkpoint(model, checkpoint_dir, strict=False)
33 |     else:
34 |         logger.warning("Provided path ({}) does not contain pre-trained weights.".format(checkpoint_dir))
35 |         return False
36 |     return True
37 | 
38 | 
39 | def load_valuehead_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool:
40 |     valuehead_file = os.path.join(checkpoint_dir, VALUE_HEAD_FILE_NAME)
41 |     if not os.path.exists(valuehead_file):
42 |         logger.warning("Provided path ({}) does not contain valuehead weights.".format(checkpoint_dir))
43 |         return False
44 |     valuehead_state_dict = torch.load(valuehead_file, map_location="cpu")
45 |     model.register_buffer("reward_head_weight", valuehead_state_dict["summary.weight"])
46 |     model.register_buffer("reward_head_bias", valuehead_state_dict["summary.bias"])
47 |     model.register_buffer("default_head_weight", torch.zeros_like(valuehead_state_dict["summary.weight"]))
48 |     model.register_buffer("default_head_bias", torch.zeros_like(valuehead_state_dict["summary.bias"]))
49 |     return True
50 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/collator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from dataclasses import dataclass
 3 | from typing import Any, Dict, List, Sequence, Tuple
 4 | from transformers import DataCollatorForSeq2Seq
 5 | 
 6 | 
 7 | @dataclass
 8 | class DPODataCollatorWithPadding(DataCollatorForSeq2Seq):
 9 |     r"""
10 |     Data collator for pairwise data.
11 |     """
12 | 
13 |     def _pad_labels(self, batch: torch.Tensor, positions: List[Tuple[int, int]]) -> torch.Tensor:
14 |         padded_labels = []
15 |         for feature, (prompt_len, answer_len) in zip(batch, positions):
16 |             if self.tokenizer.padding_side == "left":
17 |                 start, end = feature.size(0) - answer_len, feature.size(0)
18 |             else:
19 |                 start, end = prompt_len, answer_len
20 |             padded_tensor = self.label_pad_token_id * torch.ones_like(feature)
21 |             padded_tensor[start:end] = feature[start:end]
22 |             padded_labels.append(padded_tensor)
23 |         return torch.stack(padded_labels, dim=0).contiguous() # in contiguous memory
24 | 
25 |     def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
26 |         r"""
27 |         Pads batched data to the longest sequence in the batch.
28 | 
29 |         We generate 2 * n examples where the first n examples represent chosen examples and
30 |         the last n examples represent rejected examples.
31 |         """
32 |         concatenated_features = []
33 |         label_positions = []
34 |         for key in ("chosen_ids", "rejected_ids"):
35 |             for feature in features:
36 |                 prompt_len, answer_len = len(feature["prompt_ids"]), len(feature[key])
37 |                 concatenated_features.append({
38 |                     "input_ids": feature["prompt_ids"] + feature[key],
39 |                     "attention_mask": [1] * (prompt_len + answer_len)
40 |                 })
41 |                 label_positions.append((prompt_len, answer_len))
42 | 
43 |         batch = self.tokenizer.pad(
44 |             concatenated_features,
45 |             padding=self.padding,
46 |             max_length=self.max_length,
47 |             pad_to_multiple_of=self.pad_to_multiple_of,
48 |             return_tensors=self.return_tensors,
49 |         )
50 |         batch["labels"] = self._pad_labels(batch["input_ids"], label_positions)
51 |         return batch
52 | 


--------------------------------------------------------------------------------
/src/llmtuner/api/protocol.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from enum import Enum
 3 | from pydantic import BaseModel, Field
 4 | from typing import List, Optional
 5 | 
 6 | 
 7 | class Role(str, Enum):
 8 |     USER = "user"
 9 |     ASSISTANT = "assistant"
10 |     SYSTEM = "system"
11 | 
12 | 
13 | class Finish(str, Enum):
14 |     STOP = "stop"
15 |     LENGTH = "length"
16 | 
17 | 
18 | class ModelCard(BaseModel):
19 |     id: str
20 |     object: Optional[str] = "model"
21 |     created: Optional[int] = Field(default_factory=lambda: int(time.time()))
22 |     owned_by: Optional[str] = "owner"
23 |     root: Optional[str] = None
24 |     parent: Optional[str] = None
25 |     permission: Optional[list] = []
26 | 
27 | 
28 | class ModelList(BaseModel):
29 |     object: Optional[str] = "list"
30 |     data: Optional[List[ModelCard]] = []
31 | 
32 | 
33 | class ChatMessage(BaseModel):
34 |     role: Role
35 |     content: str
36 | 
37 | 
38 | class DeltaMessage(BaseModel):
39 |     role: Optional[Role] = None
40 |     content: Optional[str] = None
41 | 
42 | 
43 | class ChatCompletionRequest(BaseModel):
44 |     model: str
45 |     messages: List[ChatMessage]
46 |     temperature: Optional[float] = None
47 |     top_p: Optional[float] = None
48 |     n: Optional[int] = 1
49 |     max_tokens: Optional[int] = None
50 |     stream: Optional[bool] = False
51 | 
52 | 
53 | class ChatCompletionResponseChoice(BaseModel):
54 |     index: int
55 |     message: ChatMessage
56 |     finish_reason: Finish
57 | 
58 | 
59 | class ChatCompletionResponseStreamChoice(BaseModel):
60 |     index: int
61 |     delta: DeltaMessage
62 |     finish_reason: Optional[Finish] = None
63 | 
64 | 
65 | class ChatCompletionResponseUsage(BaseModel):
66 |     prompt_tokens: int
67 |     completion_tokens: int
68 |     total_tokens: int
69 | 
70 | 
71 | class ChatCompletionResponse(BaseModel):
72 |     id: Optional[str] = "chatcmpl-default"
73 |     object: Optional[str] = "chat.completion"
74 |     created: Optional[int] = Field(default_factory=lambda: int(time.time()))
75 |     model: str
76 |     choices: List[ChatCompletionResponseChoice]
77 |     usage: ChatCompletionResponseUsage
78 | 
79 | 
80 | class ChatCompletionStreamResponse(BaseModel):
81 |     id: Optional[str] = "chatcmpl-default"
82 |     object: Optional[str] = "chat.completion.chunk"
83 |     created: Optional[int] = Field(default_factory=lambda: int(time.time()))
84 |     model: str
85 |     choices: List[ChatCompletionResponseStreamChoice]
86 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/interface.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | from transformers.utils.versions import require_version
 3 | 
 4 | from llmtuner.webui.components import (
 5 |     create_top,
 6 |     create_train_tab,
 7 |     create_eval_tab,
 8 |     create_infer_tab,
 9 |     create_export_tab,
10 |     create_chat_box
11 | )
12 | from llmtuner.webui.chat import WebChatModel
13 | from llmtuner.webui.css import CSS
14 | from llmtuner.webui.manager import Manager
15 | from llmtuner.webui.runner import Runner
16 | 
17 | 
18 | require_version("gradio>=3.36.0", "To fix: pip install gradio>=3.36.0")
19 | 
20 | 
21 | def create_ui() -> gr.Blocks:
22 |     runner = Runner()
23 | 
24 |     with gr.Blocks(title="Web Tuner", css=CSS) as demo:
25 |         top_elems = create_top()
26 | 
27 |         with gr.Tab("Train"):
28 |             train_elems = create_train_tab(top_elems, runner)
29 | 
30 |         with gr.Tab("Evaluate"):
31 |             eval_elems = create_eval_tab(top_elems, runner)
32 | 
33 |         with gr.Tab("Chat"):
34 |             infer_elems = create_infer_tab(top_elems)
35 | 
36 |         with gr.Tab("Export"):
37 |             export_elems = create_export_tab(top_elems)
38 | 
39 |         elem_list = [top_elems, train_elems, eval_elems, infer_elems, export_elems]
40 |         manager = Manager(elem_list)
41 | 
42 |         demo.load(
43 |             manager.gen_label,
44 |             [top_elems["lang"]],
45 |             [elem for elems in elem_list for elem in elems.values()],
46 |         )
47 | 
48 |         top_elems["lang"].change(
49 |             manager.gen_label,
50 |             [top_elems["lang"]],
51 |             [elem for elems in elem_list for elem in elems.values()],
52 |             queue=False
53 |         )
54 | 
55 |     return demo
56 | 
57 | 
58 | def create_web_demo() -> gr.Blocks:
59 |     chat_model = WebChatModel(lazy_init=False)
60 | 
61 |     with gr.Blocks(title="Web Demo", css=CSS) as demo:
62 |         lang = gr.Dropdown(choices=["en", "zh"], value="en")
63 | 
64 |         _, _, _, chat_elems = create_chat_box(chat_model, visible=True)
65 | 
66 |         manager = Manager([{"lang": lang}, chat_elems])
67 | 
68 |         demo.load(manager.gen_label, [lang], [lang] + list(chat_elems.values()))
69 | 
70 |         lang.select(manager.gen_label, [lang], [lang] + list(chat_elems.values()), queue=False)
71 | 
72 |     return demo
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     demo = create_ui()
77 |     demo.queue()
78 |     demo.launch(server_name="0.0.0.0", server_port=7860, share=False, inbrowser=True)
79 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/top.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Dict
 2 | 
 3 | import gradio as gr
 4 | 
 5 | from llmtuner.extras.constants import METHODS, SUPPORTED_MODELS
 6 | from llmtuner.extras.template import templates
 7 | from llmtuner.webui.common import list_checkpoint, get_model_path, get_template, save_config
 8 | from llmtuner.webui.utils import can_quantize
 9 | 
10 | if TYPE_CHECKING:
11 |     from gradio.components import Component
12 | 
13 | 
14 | def create_top() -> Dict[str, "Component"]:
15 |     available_models = list(SUPPORTED_MODELS.keys()) + ["Custom"]
16 | 
17 |     with gr.Row():
18 |         lang = gr.Dropdown(choices=["en", "zh"], scale=1)
19 |         model_name = gr.Dropdown(choices=available_models, scale=3)
20 |         model_path = gr.Textbox(scale=3)
21 | 
22 |     with gr.Row():
23 |         finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
24 |         checkpoints = gr.Dropdown(multiselect=True, scale=5)
25 |         refresh_btn = gr.Button(scale=1)
26 | 
27 |     with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
28 |         with gr.Row():
29 |             quantization_bit = gr.Dropdown(choices=["None", "8", "4"], value="None", scale=1)
30 |             template = gr.Dropdown(choices=list(templates.keys()), value="default", scale=1)
31 |             system_prompt = gr.Textbox(scale=2)
32 | 
33 |     lang.change(save_config, [lang, model_name, model_path])
34 | 
35 |     model_name.change(
36 |         list_checkpoint, [model_name, finetuning_type], [checkpoints]
37 |     ).then(
38 |         get_model_path, [model_name], [model_path]
39 |     ).then(
40 |         get_template, [model_name], [template]
41 |     ) # do not save config since the below line will save
42 | 
43 |     model_path.change(save_config, [lang, model_name, model_path])
44 | 
45 |     finetuning_type.change(
46 |         list_checkpoint, [model_name, finetuning_type], [checkpoints]
47 |     ).then(
48 |         can_quantize, [finetuning_type], [quantization_bit]
49 |     )
50 | 
51 |     refresh_btn.click(
52 |         list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
53 |     )
54 | 
55 |     return dict(
56 |         lang=lang,
57 |         model_name=model_name,
58 |         model_path=model_path,
59 |         finetuning_type=finetuning_type,
60 |         checkpoints=checkpoints,
61 |         refresh_btn=refresh_btn,
62 |         advanced_tab=advanced_tab,
63 |         quantization_bit=quantization_bit,
64 |         template=template,
65 |         system_prompt=system_prompt
66 |     )
67 | 


--------------------------------------------------------------------------------
/src/llmtuner/dsets/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from typing import TYPE_CHECKING, Dict, List, Optional, Union
 3 | 
 4 | from llmtuner.extras.logging import get_logger
 5 | 
 6 | if TYPE_CHECKING:
 7 |     from datasets import Dataset, IterableDataset
 8 |     from transformers import TrainingArguments
 9 |     from llmtuner.hparams import DataArguments
10 | 
11 | 
12 | logger = get_logger(__name__)
13 | 
14 | 
15 | EXT2TYPE = {
16 |     "csv": "csv",
17 |     "json": "json",
18 |     "jsonl": "json",
19 |     "txt": "text"
20 | }
21 | 
22 | 
23 | def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None:
24 |     if file_sha1 is None:
25 |         logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.")
26 |         return
27 | 
28 |     if len(data_files) != 1:
29 |         logger.warning("Checksum failed: too many files.")
30 |         return
31 | 
32 |     with open(data_files[0], "rb") as f:
33 |         sha1 = hashlib.sha1(f.read()).hexdigest()
34 |         if sha1 != file_sha1:
35 |             logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0]))
36 | 
37 | 
38 | def split_dataset(
39 |     dataset: Union["Dataset", "IterableDataset"],
40 |     data_args: "DataArguments",
41 |     training_args: "TrainingArguments"
42 | ) -> Dict[str, "Dataset"]:
43 |     if training_args.do_train:
44 |         if data_args.val_size > 1e-6: # Split the dataset
45 |             if data_args.streaming:
46 |                 val_set = dataset.take(int(data_args.val_size))
47 |                 train_set = dataset.skip(int(data_args.val_size))
48 |                 dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
49 |                 logger.info('train_dataset size={}, eval_dataset size={}'.format(len(train_set), len(val_set)))
50 |                 return {"train_dataset": train_set, "eval_dataset": val_set}
51 |             else:
52 |                 val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size
53 |                 dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed)
54 |                 logger.info('train_dataset size={}, eval_dataset size={}'.format(len(dataset["train"]), len(dataset["test"])))
55 |                 return {"train_dataset": dataset["train"], "eval_dataset": dataset["test"]}
56 |         else:
57 |             if data_args.streaming:
58 |                 dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
59 |             return {"train_dataset": dataset}
60 |     else: # do_eval or do_predict
61 |         return {"eval_dataset": dataset}
62 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/tune.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Any, Dict, List, Optional
 2 | 
 3 | from llmtuner.extras.callbacks import LogCallback
 4 | from llmtuner.extras.logging import get_logger
 5 | from llmtuner.tuner.core import get_train_args, load_model_and_tokenizer
 6 | from llmtuner.tuner.pt import run_pt
 7 | from llmtuner.tuner.sft import run_sft
 8 | from llmtuner.tuner.rm import run_rm
 9 | from llmtuner.tuner.ppo import run_ppo
10 | from llmtuner.tuner.dpo import run_dpo
11 | 
12 | if TYPE_CHECKING:
13 |     from transformers import TrainerCallback
14 | 
15 | 
16 | logger = get_logger(__name__)
17 | 
18 | 
19 | def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None):
20 |     logger.info(args)
21 |     model_args, data_args, training_args, finetuning_args, generating_args, general_args = get_train_args(args)
22 | 
23 |     if training_args.local_rank == 0:
24 |         logger.info('model_args={}'.format(model_args))
25 |         logger.info('data_args={}'.format(data_args))
26 |         logger.info('training_args={}'.format(training_args))
27 |         logger.info('finetuning_args={}'.format(finetuning_args))
28 |         logger.info('generating_args={}'.format(generating_args))
29 |         logger.info('general_args={}'.format(general_args))
30 | 
31 |     callbacks = [LogCallback()] if callbacks is None else callbacks
32 | 
33 |     if general_args.stage == "pt":
34 |         run_pt(model_args, data_args, training_args, finetuning_args, callbacks)
35 |     elif general_args.stage == "sft":
36 |         run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
37 |     elif general_args.stage == "rm":
38 |         run_rm(model_args, data_args, training_args, finetuning_args, callbacks)
39 |     elif general_args.stage == "ppo":
40 |         run_ppo(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
41 |     elif general_args.stage == "dpo":
42 |         run_dpo(model_args, data_args, training_args, finetuning_args, callbacks)
43 |     else:
44 |         raise ValueError("Unknown task.")
45 | 
46 | 
47 | def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
48 |     model_args, _, training_args, finetuning_args, _, _ = get_train_args(args)
49 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
50 |     model.save_pretrained(training_args.output_dir, max_shard_size=max_shard_size)
51 |     try:
52 |         tokenizer.save_pretrained(training_args.output_dir)
53 |     except:
54 |         logger.warning("Cannot save tokenizer, please copy the files manually.")
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     run_exp()
59 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/workflow.py:
--------------------------------------------------------------------------------
 1 | # Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py
 2 | 
 3 | from copy import deepcopy
 4 | from peft import PeftModel
 5 | from typing import TYPE_CHECKING, Optional, List
 6 | from transformers import Seq2SeqTrainingArguments
 7 | 
 8 | from llmtuner.dsets import get_dataset, preprocess_dataset, split_dataset
 9 | from llmtuner.extras.constants import IGNORE_INDEX
10 | from llmtuner.extras.ploting import plot_loss
11 | from llmtuner.tuner.core import load_model_and_tokenizer
12 | from llmtuner.tuner.dpo.collator import DPODataCollatorWithPadding
13 | from llmtuner.tuner.dpo.trainer import DPOPeftTrainer
14 | 
15 | if TYPE_CHECKING:
16 |     from transformers import TrainerCallback
17 |     from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments
18 | 
19 | 
20 | def run_dpo(
21 |     model_args: "ModelArguments",
22 |     data_args: "DataArguments",
23 |     training_args: "Seq2SeqTrainingArguments",
24 |     finetuning_args: "FinetuningArguments",
25 |     callbacks: Optional[List["TrainerCallback"]] = None
26 | ):
27 |     dataset = get_dataset(model_args, data_args)
28 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft")
29 |     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="rm")
30 |     data_collator = DPODataCollatorWithPadding(
31 |         tokenizer=tokenizer,
32 |         label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
33 |     )
34 | 
35 |     training_args_dict = training_args.to_dict()
36 |     training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset
37 |     training_args = Seq2SeqTrainingArguments(**training_args_dict)
38 | 
39 |     # Initialize our Trainer
40 |     trainer = DPOPeftTrainer(
41 |         finetuning_args=finetuning_args,
42 |         ref_model=deepcopy(model) if not isinstance(model, PeftModel) else None,
43 |         model=model,
44 |         args=training_args,
45 |         tokenizer=tokenizer,
46 |         data_collator=data_collator,
47 |         callbacks=callbacks,
48 |         **split_dataset(dataset, data_args, training_args)
49 |     )
50 | 
51 |     # Training
52 |     if training_args.do_train:
53 |         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
54 |         trainer.log_metrics("train", train_result.metrics)
55 |         trainer.save_metrics("train", train_result.metrics)
56 |         trainer.save_state()
57 |         trainer.save_model()
58 |         if trainer.is_world_process_zero() and model_args.plot_loss:
59 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
60 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/pt/workflow.py:
--------------------------------------------------------------------------------
 1 | # Inspired by: https://github.com/huggingface/transformers/blob/v4.29.2/examples/pytorch/language-modeling/run_clm.py
 2 | 
 3 | import math
 4 | from typing import TYPE_CHECKING, Optional, List
 5 | from transformers import DataCollatorForLanguageModeling
 6 | 
 7 | from llmtuner.dsets import get_dataset, preprocess_dataset, split_dataset
 8 | from llmtuner.extras.ploting import plot_loss
 9 | from llmtuner.tuner.core import load_model_and_tokenizer
10 | from llmtuner.tuner.core.trainer import PeftTrainer
11 | from llmtuner.extras.logging import get_logger
12 | 
13 | if TYPE_CHECKING:
14 |     from transformers import Seq2SeqTrainingArguments, TrainerCallback
15 |     from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments
16 | 
17 | logger = get_logger(__name__)
18 | 
19 | 
20 | def run_pt(
21 |     model_args: "ModelArguments",
22 |     data_args: "DataArguments",
23 |     training_args: "Seq2SeqTrainingArguments",
24 |     finetuning_args: "FinetuningArguments",
25 |     callbacks: Optional[List["TrainerCallback"]] = None
26 | ):
27 |     dataset = get_dataset(model_args, data_args)
28 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="pt")
29 |     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="pt")
30 |     data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
31 | 
32 |     # Initialize our Trainer
33 |     trainer = PeftTrainer(
34 |         finetuning_args=finetuning_args,
35 |         model=model,
36 |         args=training_args,
37 |         tokenizer=tokenizer,
38 |         data_collator=data_collator,
39 |         callbacks=callbacks,
40 |         **split_dataset(dataset, data_args, training_args)
41 |     )
42 | 
43 |     # Training
44 |     if training_args.do_train:
45 |         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
46 |         trainer.log_metrics("train", train_result.metrics)
47 |         trainer.save_metrics("train", train_result.metrics)
48 | 
49 |         if finetuning_args.save_at_end:
50 |             trainer.save_state()
51 |             trainer.save_model()
52 |             
53 |         if trainer.is_world_process_zero() and model_args.plot_loss:
54 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
55 | 
56 |     # Evaluation
57 |     if training_args.do_eval:
58 |         metrics = trainer.evaluate(metric_key_prefix="eval")
59 |         try:
60 |             perplexity = math.exp(metrics["eval_loss"])
61 |         except OverflowError:
62 |             perplexity = float("inf")
63 | 
64 |         metrics["perplexity"] = perplexity
65 |         trainer.log_metrics("eval", metrics)
66 |         trainer.save_metrics("eval", metrics)
67 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/constants.py:
--------------------------------------------------------------------------------
 1 | IGNORE_INDEX = -100
 2 | 
 3 | LOG_FILE_NAME = "trainer_log.jsonl"
 4 | 
 5 | VALUE_HEAD_FILE_NAME = "value_head.bin"
 6 | 
 7 | FINETUNING_ARGS_NAME = "finetuning_args.json"
 8 | 
 9 | LAYERNORM_NAMES = ["norm", "ln_f", "ln_attn", "ln_mlp"]
10 | 
11 | METHODS = ["full", "freeze", "lora"]
12 | 
13 | STAGES = [
14 |     "SFT",
15 |     "Reward Modeling",
16 |     "PPO",
17 |     "DPO",
18 |     "Pre-Training"
19 | ]
20 | 
21 | DATASET_STAGE_MAP = {
22 |     "SFT": "sft",
23 |     "Pre-Training": "pt",
24 |     "Reward Modeling": "rm",
25 |     "PPO": "sft",
26 |     "DPO": "rm"
27 | }
28 | 
29 | SUPPORTED_MODELS = {
30 |     "LLaMA-7B": "huggyllama/llama-7b",
31 |     "LLaMA-13B": "huggyllama/llama-13b",
32 |     "LLaMA-30B": "huggyllama/llama-30b",
33 |     "LLaMA-65B": "huggyllama/llama-65b",
34 |     "LLaMA2-7B": "meta-llama/Llama-2-7b-hf",
35 |     "LLaMA2-13B": "meta-llama/Llama-2-13b-hf",
36 |     "LLaMA2-70B": "meta-llama/Llama-2-70b-hf",
37 |     "LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
38 |     "LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",
39 |     "LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",
40 |     "ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",
41 |     "ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",
42 |     "ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",
43 |     "ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",
44 |     "BLOOM-560M": "bigscience/bloom-560m",
45 |     "BLOOM-3B": "bigscience/bloom-3b",
46 |     "BLOOM-7B1": "bigscience/bloom-7b1",
47 |     "BLOOMZ-560M": "bigscience/bloomz-560m",
48 |     "BLOOMZ-3B": "bigscience/bloomz-3b",
49 |     "BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt",
50 |     "Falcon-7B": "tiiuae/falcon-7b",
51 |     "Falcon-7B-Chat": "tiiuae/falcon-7b-instruct",
52 |     "Falcon-40B": "tiiuae/falcon-40b",
53 |     "Falcon-40B-Chat": "tiiuae/falcon-40b-instruct",
54 |     "Baichuan-7B": "baichuan-inc/Baichuan-7B",
55 |     "Baichuan-13B": "baichuan-inc/Baichuan-13B-Base",
56 |     "Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat",
57 |     "InternLM-7B": "internlm/internlm-7b",
58 |     "InternLM-7B-Chat": "internlm/internlm-chat-7b",
59 |     "Qwen-7B": "Qwen/Qwen-7B",
60 |     "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
61 |     "XVERSE-13B": "xverse/XVERSE-13B",
62 |     "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b"
63 | }
64 | 
65 | DEFAULT_MODULE = {
66 |     "LLaMA": "q_proj,v_proj",
67 |     "LLaMA2": "q_proj,v_proj",
68 |     "ChineseLLaMA2": "q_proj,v_proj",
69 |     "BLOOM": "query_key_value",
70 |     "BLOOMZ": "query_key_value",
71 |     "Falcon": "query_key_value",
72 |     "Baichuan": "W_pack",
73 |     "InternLM": "q_proj,v_proj",
74 |     "Qwen": "c_attn",
75 |     "XVERSE": "q_proj,v_proj",
76 |     "ChatGLM2": "query_key_value"
77 | }
78 | 
79 | DEFAULT_TEMPLATE = {
80 |     "LLaMA2": "llama2",
81 |     "ChineseLLaMA2": "llama2_zh",
82 |     "Baichuan": "baichuan",
83 |     "InternLM": "intern",
84 |     "Qwen": "chatml",
85 |     "ChatGLM2": "chatglm2"
86 | }
87 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/trainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 5 | 
 6 | from llmtuner.extras.logging import get_logger
 7 | from llmtuner.tuner.core.trainer import PeftTrainer
 8 | 
 9 | if TYPE_CHECKING:
10 |     from transformers.trainer import PredictionOutput
11 |     from transformers.modeling_utils import PreTrainedModel
12 | 
13 | 
14 | logger = get_logger(__name__)
15 | 
16 | 
17 | class PairwisePeftTrainer(PeftTrainer):
18 |     r"""
19 |     Inherits PeftTrainer to compute pairwise loss.
20 |     """
21 | 
22 |     def __init__(self, *args, **kwargs):
23 |         super().__init__(*args, **kwargs)
24 |         self.can_return_loss = True # override property to return eval_loss
25 | 
26 |     def compute_loss(
27 |         self,
28 |         model: "PreTrainedModel",
29 |         inputs: Dict[str, torch.Tensor],
30 |         return_outputs: Optional[bool] = False
31 |     ) -> Union[torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor]]]:
32 |         r"""
33 |         Computes pairwise loss. The first n examples are chosen and the last n examples are rejected.
34 | 
35 |         We use score on the EOS token to represent reward of the whole sentence.
36 | 
37 |         Subclass and override to inject custom behavior. It should not be directly used by external scripts.
38 | 
39 |         Note that the first element will be removed from the output tuple.
40 | 
41 |         See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
42 |         """
43 |         batch_size = inputs["input_ids"].size(0) // 2
44 |         _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
45 |         if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2
46 |             values = torch.transpose(values, 0, 1)
47 |         r_accept, r_reject = values[:, -1].split(batch_size, dim=0)
48 |         loss = -torch.log(torch.sigmoid(r_accept - r_reject)).mean()
49 |         return (loss, [loss, r_accept, r_reject]) if return_outputs else loss
50 | 
51 |     def save_predictions(
52 |         self,
53 |         predict_results: "PredictionOutput"
54 |     ) -> None:
55 |         r"""
56 |         Saves model predictions to `output_dir`.
57 | 
58 |         A custom behavior that not contained in Seq2SeqTrainer.
59 |         """
60 |         if not self.is_world_process_zero():
61 |             return
62 | 
63 |         output_prediction_file = os.path.join(self.args.output_dir, "generated_predictions.jsonl")
64 |         logger.info(f"Saving prediction results to {output_prediction_file}")
65 | 
66 |         acc_scores, rej_scores = predict_results.predictions
67 | 
68 |         with open(output_prediction_file, "w", encoding="utf-8") as writer:
69 |             res: List[str] = []
70 |             for acc_score, rej_score in zip(acc_scores, rej_scores):
71 |                 res.append(json.dumps({"accept": round(float(acc_score), 2), "reject": round(float(rej_score), 2)}))
72 |             writer.write("\n".join(res))
73 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/rm/workflow.py:
--------------------------------------------------------------------------------
 1 | # Inspired by:
 2 | # https://github.com/lvwerra/trl/blob/main/examples/summarization/scripts/reward_summarization.py
 3 | # https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
 4 | 
 5 | from typing import TYPE_CHECKING, Optional, List
 6 | from transformers import Seq2SeqTrainingArguments
 7 | 
 8 | from llmtuner.dsets import get_dataset, preprocess_dataset, split_dataset
 9 | from llmtuner.extras.ploting import plot_loss
10 | from llmtuner.tuner.core import load_model_and_tokenizer
11 | from llmtuner.tuner.rm.metric import compute_accuracy
12 | from llmtuner.tuner.rm.collator import PairwiseDataCollatorWithPadding
13 | from llmtuner.tuner.rm.trainer import PairwisePeftTrainer
14 | 
15 | if TYPE_CHECKING:
16 |     from transformers import TrainerCallback
17 |     from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments
18 | 
19 | 
20 | def run_rm(
21 |     model_args: "ModelArguments",
22 |     data_args: "DataArguments",
23 |     training_args: "Seq2SeqTrainingArguments",
24 |     finetuning_args: "FinetuningArguments",
25 |     callbacks: Optional[List["TrainerCallback"]] = None
26 | ):
27 |     dataset = get_dataset(model_args, data_args)
28 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="rm")
29 |     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="rm")
30 |     data_collator = PairwiseDataCollatorWithPadding(tokenizer)
31 | 
32 |     training_args_dict = training_args.to_dict()
33 |     training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset
34 |     training_args = Seq2SeqTrainingArguments(**training_args_dict)
35 | 
36 |     # Initialize our Trainer
37 |     trainer = PairwisePeftTrainer(
38 |         finetuning_args=finetuning_args,
39 |         model=model,
40 |         args=training_args,
41 |         tokenizer=tokenizer,
42 |         data_collator=data_collator,
43 |         callbacks=callbacks,
44 |         compute_metrics=compute_accuracy,
45 |         **split_dataset(dataset, data_args, training_args)
46 |     )
47 | 
48 |     # Training
49 |     if training_args.do_train:
50 |         train_result = trainer.train()
51 |         trainer.log_metrics("train", train_result.metrics)
52 |         trainer.save_metrics("train", train_result.metrics)
53 |         trainer.save_state()
54 |         trainer.save_model()
55 |         if trainer.is_world_process_zero() and model_args.plot_loss:
56 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
57 | 
58 |     # Evaluation
59 |     if training_args.do_eval:
60 |         metrics = trainer.evaluate(metric_key_prefix="eval")
61 |         trainer.log_metrics("eval", metrics)
62 |         trainer.save_metrics("eval", metrics)
63 | 
64 |     # Predict
65 |     if training_args.do_predict:
66 |         predict_results = trainer.predict(dataset, metric_key_prefix="predict")
67 |         trainer.log_metrics("predict", predict_results.metrics)
68 |         trainer.save_metrics("predict", predict_results.metrics)
69 |         trainer.save_predictions(predict_results)
70 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/eval.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Dict
 2 | import gradio as gr
 3 | 
 4 | from llmtuner.webui.common import list_dataset, DEFAULT_DATA_DIR
 5 | from llmtuner.webui.components.data import create_preview_box
 6 | from llmtuner.webui.utils import can_preview, get_preview
 7 | 
 8 | if TYPE_CHECKING:
 9 |     from gradio.components import Component
10 |     from llmtuner.webui.runner import Runner
11 | 
12 | 
13 | def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[str, "Component"]:
14 |     with gr.Row():
15 |         dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2)
16 |         dataset = gr.Dropdown(multiselect=True, scale=4)
17 |         data_preview_btn = gr.Button(interactive=False, scale=1)
18 | 
19 |     preview_box, preview_count, preview_samples, close_btn = create_preview_box()
20 | 
21 |     dataset_dir.change(list_dataset, [dataset_dir], [dataset])
22 |     dataset.change(can_preview, [dataset_dir, dataset], [data_preview_btn])
23 |     data_preview_btn.click(
24 |         get_preview,
25 |         [dataset_dir, dataset],
26 |         [preview_count, preview_samples, preview_box],
27 |         queue=False
28 |     )
29 | 
30 |     with gr.Row():
31 |         max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
32 |         max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
33 |         max_samples = gr.Textbox(value="100000")
34 |         batch_size = gr.Slider(value=8, minimum=1, maximum=512, step=1)
35 |         predict = gr.Checkbox(value=True)
36 | 
37 |     with gr.Row():
38 |         cmd_preview_btn = gr.Button()
39 |         start_btn = gr.Button()
40 |         stop_btn = gr.Button()
41 | 
42 |     with gr.Row():
43 |         process_bar = gr.Slider(visible=False, interactive=False)
44 | 
45 |     with gr.Box():
46 |         output_box = gr.Markdown()
47 | 
48 |     input_components = [
49 |         top_elems["lang"],
50 |         top_elems["model_name"],
51 |         top_elems["checkpoints"],
52 |         top_elems["finetuning_type"],
53 |         top_elems["quantization_bit"],
54 |         top_elems["template"],
55 |         top_elems["system_prompt"],
56 |         dataset_dir,
57 |         dataset,
58 |         max_source_length,
59 |         max_target_length,
60 |         max_samples,
61 |         batch_size,
62 |         predict
63 |     ]
64 | 
65 |     output_components = [
66 |         output_box,
67 |         process_bar
68 |     ]
69 | 
70 |     cmd_preview_btn.click(runner.preview_eval, input_components, output_components)
71 |     start_btn.click(runner.run_eval, input_components, output_components)
72 |     stop_btn.click(runner.set_abort, queue=False)
73 | 
74 |     return dict(
75 |         dataset_dir=dataset_dir,
76 |         dataset=dataset,
77 |         data_preview_btn=data_preview_btn,
78 |         preview_count=preview_count,
79 |         preview_samples=preview_samples,
80 |         close_btn=close_btn,
81 |         max_source_length=max_source_length,
82 |         max_target_length=max_target_length,
83 |         max_samples=max_samples,
84 |         batch_size=batch_size,
85 |         predict=predict,
86 |         cmd_preview_btn=cmd_preview_btn,
87 |         start_btn=start_btn,
88 |         stop_btn=stop_btn,
89 |         output_box=output_box
90 |     )
91 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/dpo/trainer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from collections import defaultdict
 3 | from peft import PeftModel
 4 | from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
 5 | from transformers import BatchEncoding, Trainer
 6 | from trl import DPOTrainer
 7 | 
 8 | from llmtuner.extras.constants import IGNORE_INDEX
 9 | from llmtuner.tuner.core.trainer import PeftModelMixin
10 | 
11 | if TYPE_CHECKING:
12 |     from transformers import PreTrainedModel
13 |     from llmtuner.hparams import FinetuningArguments
14 | 
15 | 
16 | class DPOPeftTrainer(PeftModelMixin, DPOTrainer):
17 | 
18 |     def __init__(
19 |         self,
20 |         finetuning_args: "FinetuningArguments",
21 |         ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None,
22 |         **kwargs
23 |     ):
24 |         self.finetuning_args = finetuning_args
25 |         self.ref_model = ref_model
26 |         self.use_dpo_data_collator = True # hack to avoid warning
27 |         self.label_pad_token_id = IGNORE_INDEX
28 |         self.padding_value = 0
29 |         self.beta = finetuning_args.dpo_beta
30 |         self._stored_metrics = defaultdict(lambda: defaultdict(list))
31 | 
32 |         Trainer.__init__(self, **kwargs)
33 |         if not hasattr(self, "accelerator"):
34 |             raise AttributeError("Please update `transformers`.")
35 | 
36 |         if ref_model is not None:
37 |             self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
38 | 
39 |     def concatenated_forward(
40 |         self,
41 |         model: Optional[torch.nn.Module] = None,
42 |         batch: Optional[Dict[str, torch.Tensor]] = None
43 |     ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
44 |         batch_copied = BatchEncoding({k: v.detach().clone() for k, v in batch.items()}) # avoid error
45 |         unwrapped_model: "PreTrainedModel" = self.accelerator.unwrap_model(self.model)
46 | 
47 |         if not torch.is_grad_enabled():
48 |             unwrapped_model.gradient_checkpointing_disable()
49 | 
50 |         if model is None and isinstance(unwrapped_model, PeftModel): # peft model has no ref_model
51 |             with unwrapped_model.disable_adapter():
52 |                 all_logits = self.model(
53 |                     input_ids=batch_copied["input_ids"],
54 |                     attention_mask=batch_copied["attention_mask"],
55 |                     return_dict=True
56 |                 ).logits.to(torch.float32)
57 |         else:
58 |             all_logits = model(
59 |                 input_ids=batch_copied["input_ids"],
60 |                 attention_mask=batch_copied["attention_mask"],
61 |                 return_dict=True
62 |             ).logits.to(torch.float32)
63 | 
64 |         if not torch.is_grad_enabled():
65 |             unwrapped_model.gradient_checkpointing_enable()
66 | 
67 |         all_logps = self._get_batch_logps(
68 |             all_logits,
69 |             batch["labels"],
70 |             average_log_prob=False
71 |         )
72 |         batch_size = batch["input_ids"].size(0) // 2
73 |         chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0)
74 |         chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0)
75 |         return chosen_logps, rejected_logps, chosen_logits, rejected_logits
76 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/common.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import Any, Dict, Optional
 4 | 
 5 | import gradio as gr
 6 | from peft.utils import WEIGHTS_NAME as PEFT_WEIGHTS_NAME
 7 | from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME
 8 | 
 9 | from llmtuner.extras.constants import DEFAULT_TEMPLATE, SUPPORTED_MODELS, DATASET_STAGE_MAP
10 | 
11 | 
12 | DEFAULT_CACHE_DIR = "cache"
13 | DEFAULT_DATA_DIR = "data"
14 | DEFAULT_SAVE_DIR = "saves"
15 | USER_CONFIG = "user.config"
16 | DATA_CONFIG = "dataset_info.json"
17 | 
18 | 
19 | def get_save_dir(model_name: str) -> str:
20 |     return os.path.join(DEFAULT_SAVE_DIR, os.path.split(model_name)[-1])
21 | 
22 | 
23 | def get_config_path() -> os.PathLike:
24 |     return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG)
25 | 
26 | 
27 | def load_config() -> Dict[str, Any]:
28 |     try:
29 |         with open(get_config_path(), "r", encoding="utf-8") as f:
30 |             return json.load(f)
31 |     except:
32 |         return {"lang": "", "last_model": "", "path_dict": {}}
33 | 
34 | 
35 | def save_config(lang: str, model_name: str, model_path: str) -> None:
36 |     os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True)
37 |     user_config = load_config()
38 |     user_config["lang"] = lang or user_config["lang"]
39 |     if model_name:
40 |         user_config["last_model"] = model_name
41 |         user_config["path_dict"][model_name] = model_path
42 |     with open(get_config_path(), "w", encoding="utf-8") as f:
43 |         json.dump(user_config, f, indent=2, ensure_ascii=False)
44 | 
45 | 
46 | def get_model_path(model_name: str) -> str:
47 |     user_config = load_config()
48 |     return user_config["path_dict"].get(model_name, SUPPORTED_MODELS.get(model_name, ""))
49 | 
50 | 
51 | def get_template(model_name: str) -> str:
52 |     if model_name.endswith("Chat") and model_name.split("-")[0] in DEFAULT_TEMPLATE:
53 |         return DEFAULT_TEMPLATE[model_name.split("-")[0]]
54 |     return "default"
55 | 
56 | 
57 | def list_checkpoint(model_name: str, finetuning_type: str) -> Dict[str, Any]:
58 |     checkpoints = []
59 |     save_dir = os.path.join(get_save_dir(model_name), finetuning_type)
60 |     if save_dir and os.path.isdir(save_dir):
61 |         for checkpoint in os.listdir(save_dir):
62 |             if (
63 |                 os.path.isdir(os.path.join(save_dir, checkpoint))
64 |                 and any([
65 |                     os.path.isfile(os.path.join(save_dir, checkpoint, name))
66 |                     for name in (WEIGHTS_NAME, WEIGHTS_INDEX_NAME, PEFT_WEIGHTS_NAME)
67 |                 ])
68 |             ):
69 |                 checkpoints.append(checkpoint)
70 |     return gr.update(value=[], choices=checkpoints)
71 | 
72 | 
73 | def load_dataset_info(dataset_dir: str) -> Dict[str, Any]:
74 |     try:
75 |         with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
76 |             return json.load(f)
77 |     except:
78 |         return {}
79 | 
80 | 
81 | def list_dataset(dataset_dir: Optional[str] = None, stage: Optional[str] = None) -> Dict[str, Any]:
82 |     dataset_info = load_dataset_info(dataset_dir if dataset_dir is not None else DEFAULT_DATA_DIR)
83 |     if stage:
84 |         dataset_stage = DATASET_STAGE_MAP[stage]
85 |         dataset_info = {key: value for key, value in dataset_info.items()
86 |                         if ("stage" not in value) or value["stage"] == dataset_stage}
87 | 
88 |     return gr.update(value=[], choices=list(dataset_info.keys()))


--------------------------------------------------------------------------------
/src/llmtuner/webui/chat.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Any, Dict, List, Optional, Tuple
 3 | 
 4 | from llmtuner.chat.stream_chat import ChatModel
 5 | from llmtuner.extras.misc import torch_gc
 6 | from llmtuner.hparams import GeneratingArguments
 7 | from llmtuner.webui.common import get_model_path, get_save_dir
 8 | from llmtuner.webui.locales import ALERTS
 9 | 
10 | 
11 | class WebChatModel(ChatModel):
12 | 
13 |     def __init__(self, args: Optional[Dict[str, Any]] = None, lazy_init: Optional[bool] = True) -> None:
14 |         if lazy_init:
15 |             self.model = None
16 |             self.tokenizer = None
17 |             self.generating_args = GeneratingArguments()
18 |         else:
19 |             super().__init__(args)
20 | 
21 |     def load_model(
22 |         self,
23 |         lang: str,
24 |         model_name: str,
25 |         checkpoints: List[str],
26 |         finetuning_type: str,
27 |         quantization_bit: str,
28 |         template: str,
29 |         system_prompt: str
30 |     ):
31 |         if self.model is not None:
32 |             yield ALERTS["err_exists"][lang]
33 |             return
34 | 
35 |         if not model_name:
36 |             yield ALERTS["err_no_model"][lang]
37 |             return
38 | 
39 |         model_name_or_path = get_model_path(model_name)
40 |         if not model_name_or_path:
41 |             yield ALERTS["err_no_path"][lang]
42 |             return
43 | 
44 |         if checkpoints:
45 |             checkpoint_dir = ",".join(
46 |                 [os.path.join(get_save_dir(model_name), finetuning_type, checkpoint) for checkpoint in checkpoints]
47 |             )
48 |         else:
49 |             checkpoint_dir = None
50 | 
51 |         yield ALERTS["info_loading"][lang]
52 |         args = dict(
53 |             model_name_or_path=model_name_or_path,
54 |             checkpoint_dir=checkpoint_dir,
55 |             finetuning_type=finetuning_type,
56 |             quantization_bit=int(quantization_bit) if quantization_bit and quantization_bit != "None" else None,
57 |             template=template,
58 |             system_prompt=system_prompt
59 |         )
60 |         super().__init__(args)
61 | 
62 |         yield ALERTS["info_loaded"][lang]
63 | 
64 |     def unload_model(self, lang: str):
65 |         yield ALERTS["info_unloading"][lang]
66 |         self.model = None
67 |         self.tokenizer = None
68 |         torch_gc()
69 |         yield ALERTS["info_unloaded"][lang]
70 | 
71 |     def predict(
72 |         self,
73 |         chatbot: List[Tuple[str, str]],
74 |         query: str,
75 |         history: List[Tuple[str, str]],
76 |         system: str,
77 |         max_new_tokens: int,
78 |         top_p: float,
79 |         temperature: float
80 |     ):
81 |         chatbot.append([query, ""])
82 |         response = ""
83 |         for new_text in self.stream_chat(
84 |             query, history, system, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
85 |         ):
86 |             response += new_text
87 |             response = self.postprocess(response)
88 |             new_history = history + [(query, response)]
89 |             chatbot[-1] = [query, response]
90 |             yield chatbot, new_history
91 | 
92 |     def postprocess(self, response: str) -> str:
93 |         blocks = response.split("```")
94 |         for i, block in enumerate(blocks):
95 |             if i % 2 == 0:
96 |                 blocks[i] = block.replace("<", "&lt;").replace(">", "&gt;")
97 |         return "```".join(blocks)
98 | 


--------------------------------------------------------------------------------
/src/llmtuner/hparams/model_args.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import Literal, Optional
 3 | from dataclasses import dataclass, field
 4 | 
 5 | 
 6 | @dataclass
 7 | class ModelArguments:
 8 |     r"""
 9 |     Arguments pertaining to which model/config/tokenizer we are going to fine-tune.
10 |     """
11 |     model_name_or_path: str = field(
12 |         metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models."}
13 |     )
14 |     cache_dir: Optional[str] = field(
15 |         default=None,
16 |         metadata={"help": "Where to store the pretrained models downloaded from huggingface.co."}
17 |     )
18 |     use_fast_tokenizer: Optional[bool] = field(
19 |         default=False,
20 |         metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}
21 |     )
22 |     use_auth_token: Optional[bool] = field(
23 |         default=False,
24 |         metadata={"help": "Will use the token generated when running `huggingface-cli login`."}
25 |     )
26 |     model_revision: Optional[str] = field(
27 |         default="main",
28 |         metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}
29 |     )
30 |     padding_side: Optional[Literal["left", "right"]] = field(
31 |         default="left",
32 |         metadata={"help": "The side on which the model should have padding applied."}
33 |     )
34 |     quantization_bit: Optional[int] = field(
35 |         default=None,
36 |         metadata={"help": "The number of bits to quantize the model."}
37 |     )
38 |     quantization_type: Optional[Literal["fp4", "nf4"]] = field(
39 |         default="nf4",
40 |         metadata={"help": "Quantization data type to use in int4 training."}
41 |     )
42 |     double_quantization: Optional[bool] = field(
43 |         default=True,
44 |         metadata={"help": "Whether to use double quantization in int4 training or not."}
45 |     )
46 |     rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
47 |         default=None,
48 |         metadata={"help": "Adopt scaled rotary positional embeddings."}
49 |     )
50 |     checkpoint_dir: Optional[str] = field(
51 |         default=None,
52 |         metadata={"help": "Path to the directory(s) containing the delta model checkpoints as well as the configurations."}
53 |     )
54 |     reward_model: Optional[str] = field(
55 |         default=None,
56 |         metadata={"help": "Path to the directory containing the checkpoints of the reward model."}
57 |     )
58 |     plot_loss: Optional[bool] = field(
59 |         default=False,
60 |         metadata={"help": "Whether to plot the training loss after fine-tuning or not."}
61 |     )
62 |     hf_auth_token: Optional[str] = field(
63 |         default=None,
64 |         metadata={"help": "Auth token to log in with Hugging Face Hub."}
65 |     )
66 |     compute_dtype: Optional[torch.dtype] = field(
67 |         default=None,
68 |         metadata={"help": "Used in quantization configs. Do not specify this argument manually."}
69 |     )
70 |     model_max_length: Optional[int] = field(
71 |         default=None,
72 |         metadata={"help": "Used in rope scaling. Do not specify this argument manually."}
73 |     )
74 | 
75 |     def __post_init__(self):
76 |         if self.compute_dtype is not None or self.model_max_length is not None:
77 |             raise ValueError("These arguments cannot be specified.")
78 | 
79 |         if self.checkpoint_dir is not None: # support merging multiple lora weights
80 |             self.checkpoint_dir = [cd.strip() for cd in self.checkpoint_dir.split(",")]
81 | 
82 |         if self.quantization_bit is not None:
83 |             assert self.quantization_bit in [4, 8], "We only accept 4-bit or 8-bit quantization."
84 | 
85 |         if self.use_auth_token == True and self.hf_auth_token is not None:
86 |             from huggingface_hub.hf_api import HfFolder # lazy load
87 |             HfFolder.save_token(self.hf_auth_token)
88 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/ppo/workflow.py:
--------------------------------------------------------------------------------
 1 | # Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py
 2 | 
 3 | import math
 4 | from trl import PPOConfig
 5 | from torch.optim import AdamW
 6 | from typing import TYPE_CHECKING, Optional, List
 7 | from transformers import DataCollatorForSeq2Seq
 8 | from transformers.optimization import get_scheduler
 9 | from transformers.utils.versions import require_version
10 | 
11 | from llmtuner.dsets import get_dataset, preprocess_dataset
12 | from llmtuner.extras.ploting import plot_loss
13 | from llmtuner.tuner.core import load_model_and_tokenizer
14 | from llmtuner.tuner.ppo.trainer import PPOPeftTrainer
15 | 
16 | if TYPE_CHECKING:
17 |     from transformers import Seq2SeqTrainingArguments, TrainerCallback
18 |     from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
19 | 
20 | 
21 | def run_ppo(
22 |     model_args: "ModelArguments",
23 |     data_args: "DataArguments",
24 |     training_args: "Seq2SeqTrainingArguments",
25 |     finetuning_args: "FinetuningArguments",
26 |     generating_args: "GeneratingArguments",
27 |     callbacks: Optional[List["TrainerCallback"]] = None
28 | ):
29 |     dataset = get_dataset(model_args, data_args)
30 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="ppo")
31 |     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="ppo")
32 |     data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=tokenizer.pad_token_id)
33 | 
34 |     ppo_config = PPOConfig(
35 |         model_name=model_args.model_name_or_path,
36 |         learning_rate=training_args.learning_rate,
37 |         mini_batch_size=training_args.per_device_train_batch_size,
38 |         batch_size=training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps,
39 |         gradient_accumulation_steps=training_args.gradient_accumulation_steps,
40 |         ppo_epochs=1,
41 |         max_grad_norm=training_args.max_grad_norm,
42 |         seed=training_args.seed,
43 |         optimize_cuda_cache=True
44 |     )
45 | 
46 |     if finetuning_args.ppo_score_norm:
47 |         require_version("trl>=0.5.1.dev0", "To fix: pip install git+https://github.com/huggingface/trl.git")
48 |         ppo_config.use_score_scaling = True
49 |         ppo_config.use_score_norm = True
50 | 
51 |     optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)
52 |     total_train_batch_size = (
53 |         training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps * training_args.world_size
54 |     )
55 |     num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)
56 |     lr_scheduler = get_scheduler(
57 |         training_args.lr_scheduler_type,
58 |         optimizer=optimizer,
59 |         num_warmup_steps=training_args.get_warmup_steps(num_training_steps),
60 |         num_training_steps=num_training_steps
61 |     )
62 | 
63 |     # Initialize our Trainer
64 |     ppo_trainer = PPOPeftTrainer(
65 |         training_args=training_args,
66 |         finetuning_args=finetuning_args,
67 |         generating_args=generating_args,
68 |         callbacks=callbacks,
69 |         compute_dtype=model_args.compute_dtype,
70 |         config=ppo_config,
71 |         model=model,
72 |         ref_model=None,
73 |         tokenizer=tokenizer,
74 |         dataset=dataset,
75 |         data_collator=data_collator,
76 |         optimizer=optimizer,
77 |         lr_scheduler=lr_scheduler
78 |     )
79 | 
80 |     # Training
81 |     if training_args.do_train:
82 |         ppo_trainer.ppo_train(max_target_length=data_args.max_target_length)
83 |         ppo_trainer.save_model()
84 |         ppo_trainer.save_state() # must be called after save_model to have a folder
85 |         if ppo_trainer.is_world_process_zero() and model_args.plot_loss:
86 |             plot_loss(training_args.output_dir, keys=["loss", "reward"])
87 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/adapter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | from peft import (
 6 |     PeftModel,
 7 |     TaskType,
 8 |     LoraConfig,
 9 |     get_peft_model
10 | )
11 | from peft.utils import CONFIG_NAME, WEIGHTS_NAME
12 | 
13 | from llmtuner.extras.logging import get_logger
14 | from llmtuner.extras.save_and_load import load_trainable_params
15 | 
16 | if TYPE_CHECKING:
17 |     from transformers.modeling_utils import PreTrainedModel
18 |     from llmtuner.hparams import ModelArguments, FinetuningArguments
19 | 
20 | 
21 | logger = get_logger(__name__)
22 | 
23 | 
24 | def init_adapter(
25 |     model: "PreTrainedModel",
26 |     model_args: "ModelArguments",
27 |     finetuning_args: "FinetuningArguments",
28 |     is_trainable: bool,
29 |     is_mergeable: bool
30 | ) -> "PreTrainedModel":
31 |     r"""
32 |     Initializes the adapters.
33 | 
34 |     Support full-parameter, freeze and LoRA training.
35 | 
36 |     Note that the trainable parameters must be cast to float32.
37 |     """
38 | 
39 |     if finetuning_args.finetuning_type == "none" and is_trainable:
40 |         raise ValueError("You cannot use finetuning_type=none while training.")
41 | 
42 |     if finetuning_args.finetuning_type == "full" and is_trainable:
43 |         logger.info("Fine-tuning method: Full")
44 |         model = model.float()
45 | 
46 |     if finetuning_args.finetuning_type == "freeze":
47 |         logger.info("Fine-tuning method: Freeze")
48 | 
49 |         for name, param in model.named_parameters():
50 |             if not any(trainable_layer in name for trainable_layer in finetuning_args.trainable_layers):
51 |                 param.requires_grad_(False)
52 |             else:
53 |                 param.data = param.data.to(torch.float32)
54 | 
55 |         if model_args.checkpoint_dir is not None:
56 |             assert load_trainable_params(model, model_args.checkpoint_dir[0]), "Model checkpoint is not correctly loaded."
57 | 
58 |     if finetuning_args.finetuning_type == "lora":
59 |         logger.info("Fine-tuning method: LoRA")
60 |         latest_checkpoint = None
61 | 
62 |         if model_args.checkpoint_dir is not None:
63 |             assert os.path.exists(os.path.join(model_args.checkpoint_dir[0], WEIGHTS_NAME)), \
64 |                 "Provided path ({}) does not contain a LoRA weight.".format(model_args.checkpoint_dir[0])
65 |             assert os.path.exists(os.path.join(model_args.checkpoint_dir[0], CONFIG_NAME)), \
66 |                 "The given checkpoint may be not a LoRA checkpoint, please specify `--finetuning_type full/freeze` instead."
67 | 
68 |             if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable): # continually fine-tuning
69 |                 checkpoints_to_merge, latest_checkpoint = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
70 |             else:
71 |                 checkpoints_to_merge = model_args.checkpoint_dir
72 | 
73 |             for checkpoint in checkpoints_to_merge:
74 |                 model = PeftModel.from_pretrained(model, checkpoint)
75 |                 model = model.merge_and_unload()
76 | 
77 |             if len(checkpoints_to_merge) > 0:
78 |                 logger.info("Merged {} model checkpoint(s).".format(len(checkpoints_to_merge)))
79 | 
80 |             if latest_checkpoint is not None: # resume lora training or quantized inference
81 |                 model = PeftModel.from_pretrained(model, latest_checkpoint, is_trainable=is_trainable)
82 | 
83 |         if is_trainable and latest_checkpoint is None: # create new lora weights while training
84 |             lora_config = LoraConfig(
85 |                 task_type=TaskType.CAUSAL_LM,
86 |                 inference_mode=False,
87 |                 r=finetuning_args.lora_rank,
88 |                 lora_alpha=finetuning_args.lora_alpha,
89 |                 lora_dropout=finetuning_args.lora_dropout,
90 |                 target_modules=finetuning_args.lora_target
91 |             )
92 |             model = get_peft_model(model, lora_config)
93 | 
94 |     if model_args.checkpoint_dir is not None:
95 |         logger.info("Loaded fine-tuned model from checkpoint(s): {}".format(",".join(model_args.checkpoint_dir)))
96 | 
97 |     return model
98 | 


--------------------------------------------------------------------------------
/src/llmtuner/chat/stream_chat.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import Any, Dict, Generator, List, Optional, Tuple
 3 | from threading import Thread
 4 | from transformers import TextIteratorStreamer
 5 | 
 6 | from llmtuner.extras.misc import dispatch_model, get_logits_processor
 7 | from llmtuner.extras.template import get_template_and_fix_tokenizer
 8 | from llmtuner.tuner.core import get_infer_args, load_model_and_tokenizer
 9 | 
10 | 
11 | class ChatModel:
12 | 
13 |     def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
14 |         model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args)
15 |         self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
16 |         self.model = dispatch_model(self.model)
17 |         self.model = self.model.eval() # enable evaluation mode
18 |         self.template = get_template_and_fix_tokenizer(data_args.template, self.tokenizer)
19 |         self.system_prompt = data_args.system_prompt
20 | 
21 |     def process_args(
22 |         self,
23 |         query: str,
24 |         history: Optional[List[Tuple[str, str]]] = None,
25 |         system: Optional[str] = None,
26 |         **input_kwargs
27 |     ) -> Tuple[Dict[str, Any], int]:
28 |         system = system or self.system_prompt
29 | 
30 |         prompt, _ = self.template.encode_oneturn(
31 |             tokenizer=self.tokenizer, query=query, resp="", history=history, system=system
32 |         )
33 |         input_ids = torch.tensor([prompt], device=self.model.device)
34 |         prompt_length = len(input_ids[0])
35 | 
36 |         do_sample = input_kwargs.pop("do_sample", None)
37 |         temperature = input_kwargs.pop("temperature", None)
38 |         top_p = input_kwargs.pop("top_p", None)
39 |         top_k = input_kwargs.pop("top_k", None)
40 |         repetition_penalty = input_kwargs.pop("repetition_penalty", None)
41 |         max_length = input_kwargs.pop("max_length", None)
42 |         max_new_tokens = input_kwargs.pop("max_new_tokens", None)
43 | 
44 |         gen_kwargs = self.generating_args.to_dict()
45 |         gen_kwargs.update(dict(
46 |             input_ids=input_ids,
47 |             do_sample=do_sample if do_sample is not None else gen_kwargs["do_sample"],
48 |             temperature=temperature or gen_kwargs["temperature"],
49 |             top_p=top_p or gen_kwargs["top_p"],
50 |             top_k=top_k or gen_kwargs["top_k"],
51 |             repetition_penalty=repetition_penalty or gen_kwargs["repetition_penalty"],
52 |             eos_token_id=list(set([self.tokenizer.eos_token_id] + self.tokenizer.additional_special_tokens_ids)),
53 |             pad_token_id=self.tokenizer.pad_token_id,
54 |             logits_processor=get_logits_processor()
55 |         ))
56 | 
57 |         if max_length:
58 |             gen_kwargs.pop("max_new_tokens", None)
59 |             gen_kwargs["max_length"] = max_length
60 | 
61 |         if max_new_tokens:
62 |             gen_kwargs.pop("max_length", None)
63 |             gen_kwargs["max_new_tokens"] = max_new_tokens
64 | 
65 |         return gen_kwargs, prompt_length
66 | 
67 |     @torch.inference_mode()
68 |     def chat(
69 |         self,
70 |         query: str,
71 |         history: Optional[List[Tuple[str, str]]] = None,
72 |         system: Optional[str] = None,
73 |         **input_kwargs
74 |     ) -> Tuple[str, Tuple[int, int]]:
75 |         gen_kwargs, prompt_length = self.process_args(query, history, system, **input_kwargs)
76 |         generation_output = self.model.generate(**gen_kwargs)
77 |         outputs = generation_output.tolist()[0][prompt_length:]
78 |         response = self.tokenizer.decode(outputs, skip_special_tokens=True)
79 |         response_length = len(outputs)
80 |         return response, (prompt_length, response_length)
81 | 
82 |     @torch.inference_mode()
83 |     def stream_chat(
84 |         self,
85 |         query: str,
86 |         history: Optional[List[Tuple[str, str]]] = None,
87 |         system: Optional[str] = None,
88 |         **input_kwargs
89 |     ) -> Generator[str, None, None]:
90 |         gen_kwargs, _ = self.process_args(query, history, system, **input_kwargs)
91 |         streamer = TextIteratorStreamer(self.tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
92 |         gen_kwargs["streamer"] = streamer
93 | 
94 |         thread = Thread(target=self.model.generate, kwargs=gen_kwargs)
95 |         thread.start()
96 | 
97 |         yield from streamer
98 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/workflow.py:
--------------------------------------------------------------------------------
 1 | # Inspired by: https://github.com/huggingface/transformers/blob/v4.29.2/examples/pytorch/summarization/run_summarization.py
 2 | 
 3 | from typing import TYPE_CHECKING, Optional, List
 4 | from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments
 5 | 
 6 | from llmtuner.dsets import get_dataset, preprocess_dataset, split_dataset
 7 | from llmtuner.extras.constants import IGNORE_INDEX
 8 | from llmtuner.extras.misc import get_logits_processor
 9 | from llmtuner.extras.ploting import plot_loss
10 | from llmtuner.tuner.core import load_model_and_tokenizer
11 | from llmtuner.tuner.sft.metric import ComputeMetrics
12 | from llmtuner.tuner.sft.trainer import Seq2SeqPeftTrainer
13 | from llmtuner.extras.logging import get_logger
14 | 
15 | if TYPE_CHECKING:
16 |     from transformers import TrainerCallback
17 |     from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
18 | 
19 | 
20 | logger = get_logger(__name__)
21 | 
22 | 
23 | def run_sft(
24 |     model_args: "ModelArguments",
25 |     data_args: "DataArguments",
26 |     training_args: "Seq2SeqTrainingArguments",
27 |     finetuning_args: "FinetuningArguments",
28 |     generating_args: "GeneratingArguments",
29 |     callbacks: Optional[List["TrainerCallback"]] = None
30 | ):
31 |     dataset = get_dataset(model_args, data_args)
32 |     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft")
33 |     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft")
34 |     data_collator = DataCollatorForSeq2Seq(
35 |         tokenizer=tokenizer,
36 |         label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id
37 |     )
38 | 
39 |     # Override the decoding parameters of Seq2SeqTrainer
40 |     training_args_dict = training_args.to_dict()
41 |     training_args_dict.update(dict(
42 |         generation_max_length=training_args.generation_max_length or data_args.max_target_length,
43 |         generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams
44 |     ))
45 |     training_args = Seq2SeqTrainingArguments(**training_args_dict)
46 | 
47 |     logger.info('Tokenizer={}'.format(tokenizer))
48 |     # Initialize our Trainer
49 |     trainer = Seq2SeqPeftTrainer(
50 |         finetuning_args=finetuning_args,
51 |         model=model,
52 |         args=training_args,
53 |         tokenizer=tokenizer,
54 |         data_collator=data_collator,
55 |         callbacks=callbacks,
56 |         compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None,
57 |         **split_dataset(dataset, data_args, training_args)
58 |     )
59 | 
60 |     # Keyword arguments for `model.generate`
61 |     gen_kwargs = generating_args.to_dict()
62 |     gen_kwargs["eos_token_id"] = list(set([tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids))
63 |     gen_kwargs["pad_token_id"] = tokenizer.pad_token_id
64 |     gen_kwargs["logits_processor"] = get_logits_processor()
65 | 
66 |     # Training
67 |     if training_args.do_train:
68 |         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
69 |         trainer.log_metrics("train", train_result.metrics)
70 |         trainer.save_metrics("train", train_result.metrics)
71 |         if finetuning_args.save_at_end:
72 |             trainer.save_state()
73 |             trainer.save_model()
74 |             
75 |         if trainer.is_world_process_zero() and model_args.plot_loss:
76 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
77 | 
78 |     # Evaluation
79 |     if training_args.do_eval:
80 |         metrics = trainer.evaluate(metric_key_prefix="eval", **gen_kwargs)
81 |         if training_args.predict_with_generate: # eval_loss will be wrong if predict_with_generate is enabled
82 |             metrics.pop("eval_loss", None)
83 |         trainer.log_metrics("eval", metrics)
84 |         trainer.save_metrics("eval", metrics)
85 | 
86 |     # Predict
87 |     if training_args.do_predict:
88 |         predict_results = trainer.predict(dataset, metric_key_prefix="predict", **gen_kwargs)
89 |         if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled
90 |             predict_results.metrics.pop("predict_loss", None)
91 |         trainer.log_metrics("predict", predict_results.metrics)
92 |         trainer.save_metrics("predict", predict_results.metrics)
93 |         trainer.save_predictions(predict_results)
94 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/sft/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import torch
  4 | import numpy as np
  5 | import torch.nn as nn
  6 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
  7 | 
  8 | from llmtuner.extras.constants import IGNORE_INDEX
  9 | from llmtuner.extras.logging import get_logger
 10 | from llmtuner.tuner.core.trainer import PeftTrainer
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from transformers.trainer import PredictionOutput
 14 | 
 15 | 
 16 | logger = get_logger(__name__)
 17 | 
 18 | 
 19 | class Seq2SeqPeftTrainer(PeftTrainer):
 20 |     r"""
 21 |     Inherits PeftTrainer to compute generative metrics such as BLEU and ROUGE.
 22 |     """
 23 | 
 24 |     def prediction_step(
 25 |         self,
 26 |         model: nn.Module,
 27 |         inputs: Dict[str, Union[torch.Tensor, Any]],
 28 |         prediction_loss_only: bool,
 29 |         ignore_keys: Optional[List[str]] = None,
 30 |     ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
 31 |         r"""
 32 |         Removes the prompt part in the generated tokens.
 33 | 
 34 |         Subclass and override to inject custom behavior.
 35 |         """
 36 |         prompt_len, label_len = inputs["input_ids"].size(-1), inputs["labels"].size(-1)
 37 |         if prompt_len > label_len:
 38 |             inputs["labels"] = self._pad_tensors_to_target_len(inputs["labels"], inputs["input_ids"])
 39 |         if label_len > prompt_len:
 40 |             inputs["input_ids"] = self._pad_tensors_to_target_len(inputs["input_ids"], inputs["labels"])
 41 |             if "attention_mask" in inputs:
 42 |                 inputs["attention_mask"] = self._pad_tensors_to_target_len(
 43 |                     inputs["attention_mask"], inputs["labels"], pad_token_id=0
 44 |                 )
 45 |             if "position_ids" in inputs:
 46 |                 inputs["position_ids"] = self._pad_tensors_to_target_len(
 47 |                     inputs["position_ids"], inputs["labels"], pad_token_id=0
 48 |                 )
 49 | 
 50 |         loss, generated_tokens, labels = super().prediction_step(
 51 |             model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
 52 |         )
 53 |         if generated_tokens is not None:
 54 |             generated_tokens[:, :max(prompt_len, label_len)] = (
 55 |                 self.tokenizer.pad_token_id * torch.ones_like(generated_tokens[:, :max(prompt_len, label_len)])
 56 |             )
 57 | 
 58 |         return loss, generated_tokens, labels
 59 | 
 60 |     def _pad_tensors_to_target_len(
 61 |         self,
 62 |         src_tensor: torch.Tensor,
 63 |         tgt_tensor: torch.Tensor,
 64 |         pad_token_id: Optional[int] = None
 65 |     ) -> torch.Tensor:
 66 |         r"""
 67 |         Pads the tensor to the same length as the target tensor.
 68 | 
 69 |         Should only be called when predict_with_generate=True.
 70 |         """
 71 |         if pad_token_id is None:
 72 |             if self.tokenizer is not None and hasattr(self.tokenizer, "pad_token_id"):
 73 |                 assert self.tokenizer.padding_side == "left", "This method only accepts left-padded tensor."
 74 |                 pad_token_id = self.tokenizer.pad_token_id
 75 |             else:
 76 |                 raise ValueError("PAD token is required.")
 77 | 
 78 |         padded_tensor = pad_token_id * torch.ones_like(tgt_tensor)
 79 |         padded_tensor[:, -src_tensor.shape[-1]:] = src_tensor # adopt left-padding
 80 |         return padded_tensor.contiguous() # in contiguous memory
 81 | 
 82 |     def save_predictions(
 83 |         self,
 84 |         predict_results: "PredictionOutput"
 85 |     ) -> None:
 86 |         r"""
 87 |         Saves model predictions to `output_dir`.
 88 | 
 89 |         A custom behavior that not contained in Seq2SeqTrainer.
 90 |         """
 91 |         if not self.is_world_process_zero():
 92 |             return
 93 | 
 94 |         output_prediction_file = os.path.join(self.args.output_dir, "generated_predictions.jsonl")
 95 |         logger.info(f"Saving prediction results to {output_prediction_file}")
 96 | 
 97 |         preds = np.where(predict_results.predictions != IGNORE_INDEX, predict_results.predictions, self.tokenizer.pad_token_id)
 98 |         labels = np.where(predict_results.label_ids != IGNORE_INDEX, predict_results.label_ids, self.tokenizer.pad_token_id)
 99 | 
100 |         decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True, clean_up_tokenization_spaces=True)
101 |         decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True, clean_up_tokenization_spaces=True)
102 | 
103 |         with open(output_prediction_file, "w", encoding="utf-8") as writer:
104 |             res: List[str] = []
105 |             for pred, label in zip(decoded_preds, decoded_labels):
106 |                 res.append(json.dumps({"label": label, "predict": pred}, ensure_ascii=False))
107 |             writer.write("\n".join(res))
108 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | from typing import TYPE_CHECKING, Dict, Optional
  4 | 
  5 | from transformers import Seq2SeqTrainer
  6 | from transformers.trainer import TRAINING_ARGS_NAME, WEIGHTS_NAME
  7 | from transformers.modeling_utils import PreTrainedModel, unwrap_model
  8 | from peft import PeftModel
  9 | from trl import PreTrainedModelWrapper
 10 | 
 11 | from llmtuner.extras.constants import FINETUNING_ARGS_NAME, VALUE_HEAD_FILE_NAME
 12 | from llmtuner.extras.logging import get_logger
 13 | from llmtuner.extras.save_and_load import get_state_dict, load_trainable_params
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from transformers import PreTrainedTokenizer, Seq2SeqTrainingArguments, TrainerState
 17 |     from llmtuner.hparams import FinetuningArguments
 18 | 
 19 | 
 20 | logger = get_logger(__name__)
 21 | 
 22 | 
 23 | class PeftModelMixin:
 24 |     r"""
 25 |     Patches the save and load methods in Hugging Face Trainer for PeftModel and ModelWithValueHead.
 26 |     """
 27 | 
 28 |     def __init__(self) -> None: # for type checking
 29 |         self.model: PreTrainedModel = None
 30 |         self.tokenizer: "PreTrainedTokenizer" = None
 31 |         self.args: "Seq2SeqTrainingArguments" = None
 32 |         self.finetuning_args: "FinetuningArguments" = None
 33 |         self.state: "TrainerState" = None
 34 |         raise AssertionError("Mixin should not be initialized.")
 35 | 
 36 |     def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, torch.Tensor]] = None) -> None:
 37 |         r"""
 38 |         Saves trainable parameters as model checkpoint.
 39 | 
 40 |         This function will only be executed at the process zero.
 41 | 
 42 |         Subclass and override to inject custom behavior. It should not be directly used by external scripts.
 43 |         """
 44 |         output_dir = output_dir if output_dir is not None else self.args.output_dir
 45 |         os.makedirs(output_dir, exist_ok=True)
 46 |         logger.info(f"Saving model checkpoint to {output_dir}")
 47 | 
 48 |         model = unwrap_model(self.model)
 49 |         if isinstance(model, PreTrainedModelWrapper):
 50 |             # Custom state dict: https://github.com/lvwerra/trl/blob/v0.4.7/trl/models/modeling_value_head.py#L200
 51 |             model_state_dict = state_dict or model.state_dict()
 52 |             v_head_state_dict = {
 53 |                 name.replace("v_head.", ""): model_state_dict[name].cpu().clone().detach()
 54 |                 for name in model_state_dict.keys() if name.startswith("v_head.")
 55 |             }
 56 | 
 57 |             torch.save(v_head_state_dict, os.path.join(output_dir, VALUE_HEAD_FILE_NAME))
 58 |             model = model.pretrained_model
 59 | 
 60 |         state_dict = state_dict or get_state_dict(model)
 61 |         if isinstance(model, (PeftModel, PreTrainedModel)):
 62 |             model.config.use_cache = True
 63 |             model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=self.args.save_safetensors)
 64 |             model.config.use_cache = False
 65 |         else:
 66 |             torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
 67 | 
 68 |         if self.finetuning_args.finetuning_type == "full" and self.tokenizer is not None:
 69 |             try:
 70 |                 self.tokenizer.save_pretrained(output_dir)
 71 |             except:
 72 |                 logger.warning("Cannot save tokenizer, copy the files manually.")
 73 | 
 74 |         with open(os.path.join(output_dir, TRAINING_ARGS_NAME), "w", encoding="utf-8") as f:
 75 |             f.write(self.args.to_json_string() + "\n")
 76 | 
 77 |         self.finetuning_args.save_to_json(os.path.join(output_dir, FINETUNING_ARGS_NAME))
 78 | 
 79 |     def _load_best_model(self):
 80 |         r"""
 81 |         Loads trainable parameters from model checkpoint.
 82 | 
 83 |         Subclass and override to inject custom behavior. It should not be directly used by external scripts.
 84 |         """
 85 |         logger.info(f"Loading best model from {self.state.best_model_checkpoint} (score: {self.state.best_metric}).")
 86 |         model = unwrap_model(self.model)
 87 | 
 88 |         if isinstance(model, PreTrainedModelWrapper):
 89 |             model.v_head.load_state_dict(torch.load(
 90 |                 os.path.join(self.state.best_model_checkpoint, VALUE_HEAD_FILE_NAME), map_location="cpu"
 91 |             ))
 92 |             model = model.pretrained_model
 93 | 
 94 |         if isinstance(model, PeftModel):
 95 |             model.load_adapter(self.state.best_model_checkpoint, model.active_adapter)
 96 |         else: # freeze/full-tuning
 97 |             load_trainable_params(model, self.state.best_model_checkpoint)
 98 | 
 99 | 
100 | class PeftTrainer(PeftModelMixin, Seq2SeqTrainer):
101 |     r"""
102 |     Inherits Seq2SeqTrainer to support parameter-efficient checkpoints.
103 |     """
104 | 
105 |     def __init__(self, finetuning_args: "FinetuningArguments", **kwargs):
106 |         Seq2SeqTrainer.__init__(self, **kwargs)
107 |         self.finetuning_args = finetuning_args
108 | 


--------------------------------------------------------------------------------
/src/llmtuner/api/app.py:
--------------------------------------------------------------------------------
  1 | import uvicorn
  2 | from fastapi import FastAPI, HTTPException
  3 | from fastapi.middleware.cors import CORSMiddleware
  4 | from contextlib import asynccontextmanager
  5 | from sse_starlette import EventSourceResponse
  6 | from typing import List, Tuple
  7 | 
  8 | from llmtuner.extras.misc import torch_gc
  9 | from llmtuner.chat import ChatModel
 10 | from llmtuner.api.protocol import (
 11 |     Role,
 12 |     Finish,
 13 |     ModelCard,
 14 |     ModelList,
 15 |     ChatMessage,
 16 |     DeltaMessage,
 17 |     ChatCompletionRequest,
 18 |     ChatCompletionResponse,
 19 |     ChatCompletionStreamResponse,
 20 |     ChatCompletionResponseChoice,
 21 |     ChatCompletionResponseStreamChoice,
 22 |     ChatCompletionResponseUsage
 23 | )
 24 | 
 25 | 
 26 | @asynccontextmanager
 27 | async def lifespan(app: FastAPI): # collects GPU memory
 28 |     yield
 29 |     torch_gc()
 30 | 
 31 | 
 32 | def create_app(chat_model: ChatModel) -> FastAPI:
 33 |     app = FastAPI(lifespan=lifespan)
 34 | 
 35 |     app.add_middleware(
 36 |         CORSMiddleware,
 37 |         allow_origins=["*"],
 38 |         allow_credentials=True,
 39 |         allow_methods=["*"],
 40 |         allow_headers=["*"],
 41 |     )
 42 | 
 43 |     @app.get("/v1/models", response_model=ModelList)
 44 |     async def list_models():
 45 |         model_card = ModelCard(id="gpt-3.5-turbo")
 46 |         return ModelList(data=[model_card])
 47 | 
 48 |     @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
 49 |     async def create_chat_completion(request: ChatCompletionRequest):
 50 |         if len(request.messages) < 1 or request.messages[-1].role != Role.USER:
 51 |             raise HTTPException(status_code=400, detail="Invalid request")
 52 | 
 53 |         query = request.messages[-1].content
 54 |         prev_messages = request.messages[:-1]
 55 |         if len(prev_messages) > 0 and prev_messages[0].role == Role.SYSTEM:
 56 |             system = prev_messages.pop(0).content
 57 |         else:
 58 |             system = None
 59 | 
 60 |         history = []
 61 |         if len(prev_messages) % 2 == 0:
 62 |             for i in range(0, len(prev_messages), 2):
 63 |                 if prev_messages[i].role == Role.USER and prev_messages[i+1].role == Role.ASSISTANT:
 64 |                     history.append([prev_messages[i].content, prev_messages[i+1].content])
 65 | 
 66 |         if request.stream:
 67 |             generate = predict(query, history, system, request)
 68 |             return EventSourceResponse(generate, media_type="text/event-stream")
 69 | 
 70 |         response, (prompt_length, response_length) = chat_model.chat(
 71 |             query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
 72 |         )
 73 | 
 74 |         usage = ChatCompletionResponseUsage(
 75 |             prompt_tokens=prompt_length,
 76 |             completion_tokens=response_length,
 77 |             total_tokens=prompt_length+response_length
 78 |         )
 79 | 
 80 |         choice_data = ChatCompletionResponseChoice(
 81 |             index=0,
 82 |             message=ChatMessage(role=Role.ASSISTANT, content=response),
 83 |             finish_reason=Finish.STOP
 84 |         )
 85 | 
 86 |         return ChatCompletionResponse(model=request.model, choices=[choice_data], usage=usage)
 87 | 
 88 |     async def predict(query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest):
 89 |         choice_data = ChatCompletionResponseStreamChoice(
 90 |             index=0,
 91 |             delta=DeltaMessage(role=Role.ASSISTANT),
 92 |             finish_reason=None
 93 |         )
 94 |         chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
 95 |         yield chunk.json(exclude_unset=True, ensure_ascii=False)
 96 | 
 97 |         for new_text in chat_model.stream_chat(
 98 |             query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
 99 |         ):
100 |             if len(new_text) == 0:
101 |                 continue
102 | 
103 |             choice_data = ChatCompletionResponseStreamChoice(
104 |                 index=0,
105 |                 delta=DeltaMessage(content=new_text),
106 |                 finish_reason=None
107 |             )
108 |             chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
109 |             yield chunk.json(exclude_unset=True, ensure_ascii=False)
110 | 
111 |         choice_data = ChatCompletionResponseStreamChoice(
112 |             index=0,
113 |             delta=DeltaMessage(),
114 |             finish_reason=Finish.STOP
115 |         )
116 |         chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
117 |         yield chunk.json(exclude_unset=True, ensure_ascii=False)
118 |         yield "[DONE]"
119 | 
120 |     return app
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     chat_model = ChatModel()
125 |     app = create_app(chat_model)
126 |     uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
127 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/misc.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from typing import TYPE_CHECKING, List, Optional, Tuple
  3 | from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList
  4 | 
  5 | from llmtuner.extras.constants import LAYERNORM_NAMES
  6 | 
  7 | if TYPE_CHECKING:
  8 |     from transformers.modeling_utils import PreTrainedModel
  9 | 
 10 | 
 11 | class AverageMeter:
 12 |     r"""
 13 |     Computes and stores the average and current value.
 14 |     """
 15 |     def __init__(self):
 16 |         self.reset()
 17 | 
 18 |     def reset(self):
 19 |         self.val = 0
 20 |         self.avg = 0
 21 |         self.sum = 0
 22 |         self.count = 0
 23 | 
 24 |     def update(self, val, n=1):
 25 |         self.val = val
 26 |         self.sum += val * n
 27 |         self.count += n
 28 |         self.avg = self.sum / self.count
 29 | 
 30 | 
 31 | def get_logits_processor() -> LogitsProcessorList:
 32 |     logits_processor = LogitsProcessorList()
 33 |     logits_processor.append(InfNanRemoveLogitsProcessor())
 34 |     return logits_processor
 35 | 
 36 | 
 37 | def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
 38 |     r"""
 39 |     Returns the number of trainable parameters and number of all parameters in the model.
 40 |     """
 41 |     trainable_params, all_param = 0, 0
 42 |     for param in model.parameters():
 43 |         num_params = param.numel()
 44 |         # if using DS Zero 3 and the weights are initialized empty
 45 |         if num_params == 0 and hasattr(param, "ds_numel"):
 46 |             num_params = param.ds_numel
 47 | 
 48 |         # Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by 2
 49 |         if param.__class__.__name__ == "Params4bit":
 50 |             num_params = num_params * 2
 51 | 
 52 |         all_param += num_params
 53 |         if param.requires_grad:
 54 |             trainable_params += num_params
 55 | 
 56 |     return trainable_params, all_param
 57 | 
 58 | 
 59 | # Includes: (1) cast the layernorm in fp32 (2) make output embedding layer require grads (3) upcast the lm_head to fp32
 60 | # Inspired by: https://github.com/huggingface/peft/blob/c0209c35abbf88c63aa267800d98a8e212ed0a42/src/peft/utils/other.py#L35
 61 | def prepare_model_for_training(
 62 |     model: "PreTrainedModel",
 63 |     finetuning_type: str,
 64 |     output_layer_name: Optional[str] = "lm_head",
 65 |     use_gradient_checkpointing: Optional[bool] = True,
 66 |     layer_norm_names: Optional[List[str]] = LAYERNORM_NAMES
 67 | ) -> "PreTrainedModel":
 68 |     for name, param in model.named_parameters():
 69 |         if param.ndim == 1 and any(layer_norm_name in name for layer_norm_name in layer_norm_names):
 70 |             param.data = param.data.to(torch.float32)
 71 | 
 72 |     if use_gradient_checkpointing:
 73 |         if hasattr(model, "enable_input_require_grads"):
 74 |             model.enable_input_require_grads()
 75 |         else:
 76 |             def make_inputs_require_grad(module, input, output):
 77 |                 output.requires_grad_(True)
 78 |             model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
 79 | 
 80 |         model.gradient_checkpointing_enable()
 81 |         model.config.use_cache = False # turn off when gradient checkpointing is enabled
 82 | 
 83 |     if finetuning_type != "full" and hasattr(model, output_layer_name):
 84 |         output_layer: torch.nn.Linear = getattr(model, output_layer_name)
 85 |         input_dtype = output_layer.weight.dtype
 86 | 
 87 |         class CastOutputToFloat(torch.nn.Sequential):
 88 | 
 89 |             def forward(self, x: torch.Tensor) -> torch.Tensor:
 90 |                 return super().forward(x.to(input_dtype)).to(torch.float32)
 91 | 
 92 |         setattr(model, output_layer_name, CastOutputToFloat(output_layer))
 93 | 
 94 |     return model
 95 | 
 96 | 
 97 | def torch_gc() -> None:
 98 |     r"""
 99 |     Collects GPU memory.
100 |     """
101 |     if torch.cuda.is_available():
102 |         torch.cuda.empty_cache()
103 |         torch.cuda.ipc_collect()
104 | 
105 | 
106 | def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel":
107 |     r"""
108 |     Dispatches a pre-trained model to GPUs with balanced memory.
109 |     Borrowed from: https://github.com/huggingface/transformers/blob/v4.31.0/src/transformers/modeling_utils.py#L2803
110 |     """
111 |     if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): # do nothing
112 |         return model
113 | 
114 |     if torch.cuda.device_count() > 1:
115 |         from accelerate import dispatch_model
116 |         from accelerate.utils import infer_auto_device_map, get_balanced_memory
117 | 
118 |         if model._no_split_modules is None:
119 |             raise ValueError("The model class needs to implement the `_no_split_modules` attribute.")
120 | 
121 |         kwargs = {"dtype": model.dtype, "no_split_module_classes": model._no_split_modules}
122 |         max_memory = get_balanced_memory(model, **kwargs)
123 |         # Make sure tied weights are tied before creating the device map.
124 |         model.tie_weights()
125 |         device_map = infer_auto_device_map(model, max_memory=max_memory, **kwargs)
126 |         return dispatch_model(model, device_map)
127 |     else:
128 |         return model.cuda()
129 | 


--------------------------------------------------------------------------------
/src/llmtuner/hparams/finetuning_args.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Literal, Optional
  3 | from dataclasses import asdict, dataclass, field
  4 | 
  5 | 
  6 | @dataclass
  7 | class FinetuningArguments:
  8 |     r"""
  9 |     Arguments pertaining to which techniques we are going to fine-tuning with.
 10 |     """
 11 |     finetuning_type: Optional[Literal["lora", "freeze", "full", "none"]] = field(
 12 |         default="lora",
 13 |         metadata={"help": "Which fine-tuning method to use."}
 14 |     )
 15 |     num_hidden_layers: Optional[int] = field(
 16 |         default=32,
 17 |         metadata={"help": "Number of decoder blocks in the model for partial-parameter (freeze) fine-tuning. \
 18 |                   LLaMA choices: [\"32\", \"40\", \"60\", \"80\"], \
 19 |                   LLaMA-2 choices: [\"32\", \"40\", \"80\"], \
 20 |                   BLOOM choices: [\"24\", \"30\", \"70\"], \
 21 |                   Falcon choices: [\"32\", \"60\"], \
 22 |                   Baichuan choices: [\"32\", \"40\"] \
 23 |                   Qwen choices: [\"32\"], \
 24 |                   XVERSE choices: [\"40\"], \
 25 |                   ChatGLM2 choices: [\"28\"]"}
 26 |     )
 27 |     num_layer_trainable: Optional[int] = field(
 28 |         default=3,
 29 |         metadata={"help": "Number of trainable layers for partial-parameter (freeze) fine-tuning."}
 30 |     )
 31 |     name_module_trainable: Optional[Literal["mlp", "self_attn", "self_attention"]] = field(
 32 |         default="mlp",
 33 |         metadata={"help": "Name of trainable modules for partial-parameter (freeze) fine-tuning. \
 34 |                   LLaMA choices: [\"mlp\", \"self_attn\"], \
 35 |                   BLOOM & Falcon & ChatGLM2 choices: [\"mlp\", \"self_attention\"], \
 36 |                   Baichuan choices: [\"mlp\", \"self_attn\"], \
 37 |                   Qwen choices: [\"mlp\", \"attn\"], \
 38 |                   LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
 39 |     )
 40 |     lora_rank: Optional[int] = field(
 41 |         default=8,
 42 |         metadata={"help": "The intrinsic dimension for LoRA fine-tuning."}
 43 |     )
 44 |     lora_alpha: Optional[float] = field(
 45 |         default=32.0,
 46 |         metadata={"help": "The scale factor for LoRA fine-tuning (similar with the learning rate)."}
 47 |     )
 48 |     lora_dropout: Optional[float] = field(
 49 |         default=0.1,
 50 |         metadata={"help": "Dropout rate for the LoRA fine-tuning."}
 51 |     )
 52 |     lora_target: Optional[str] = field(
 53 |         default=None,
 54 |         metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
 55 |                   LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
 56 |                   BLOOM & Falcon & ChatGLM2 choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \
 57 |                   Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
 58 |                   Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
 59 |                   LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
 60 |     )
 61 |     resume_lora_training: Optional[bool] = field(
 62 |         default=True,
 63 |         metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
 64 |     )
 65 |     ppo_score_norm: Optional[bool] = field(
 66 |         default=False,
 67 |         metadata={"help": "Use score normalization in PPO Training."}
 68 |     )
 69 |     dpo_beta: Optional[float] = field(
 70 |         default=0.1,
 71 |         metadata={"help": "The beta parameter for the DPO loss."}
 72 |     )
 73 |     save_at_end: Optional[bool] = field(
 74 |         default=False,
 75 |         metadata={"help": "Whether to save model at the end"}
 76 |     )
 77 | 
 78 |     def __post_init__(self):
 79 |         if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA
 80 |             self.lora_target = [target.strip() for target in self.lora_target.split(",")]
 81 | 
 82 |         if self.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0
 83 |             trainable_layer_ids = [self.num_hidden_layers - k - 1 for k in range(self.num_layer_trainable)]
 84 |         else: # fine-tuning the first n layers if num_layer_trainable < 0
 85 |             trainable_layer_ids = [k for k in range(-self.num_layer_trainable)]
 86 | 
 87 |         self.trainable_layers = ["{:d}.{}".format(idx, self.name_module_trainable) for idx in trainable_layer_ids]
 88 | 
 89 |         assert self.finetuning_type in ["lora", "freeze", "full", 
 90 |         "none"], "Invalid fine-tuning method."
 91 | 
 92 |     def save_to_json(self, json_path: str):
 93 |         r"""Saves the content of this instance in JSON format inside `json_path`."""
 94 |         json_string = json.dumps(asdict(self), indent=2, sort_keys=True) + "\n"
 95 |         with open(json_path, "w", encoding="utf-8") as f:
 96 |             f.write(json_string)
 97 | 
 98 |     @classmethod
 99 |     def load_from_json(cls, json_path: str):
100 |         r"""Creates an instance from the content of `json_path`."""
101 |         with open(json_path, "r", encoding="utf-8") as f:
102 |             text = f.read()
103 |         return cls(**json.loads(text))
104 | 


--------------------------------------------------------------------------------
/src/llmtuner/extras/callbacks.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import time
  4 | from typing import TYPE_CHECKING
  5 | from datetime import timedelta
  6 | 
  7 | from transformers import TrainerCallback
  8 | from transformers.trainer_utils import has_length
  9 | 
 10 | from llmtuner.extras.constants import LOG_FILE_NAME
 11 | from llmtuner.extras.logging import get_logger
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from transformers import TrainingArguments, TrainerState, TrainerControl
 15 | 
 16 | 
 17 | logger = get_logger(__name__)
 18 | 
 19 | 
 20 | class LogCallback(TrainerCallback):
 21 | 
 22 |     def __init__(self, runner=None):
 23 |         self.runner = runner
 24 |         self.in_training = False
 25 |         self.start_time = time.time()
 26 |         self.cur_steps = 0
 27 |         self.max_steps = 0
 28 |         self.elapsed_time = ""
 29 |         self.remaining_time = ""
 30 | 
 31 |     def timing(self):
 32 |         cur_time = time.time()
 33 |         elapsed_time = cur_time - self.start_time
 34 |         avg_time_per_step = elapsed_time / self.cur_steps if self.cur_steps != 0 else 0
 35 |         remaining_time = (self.max_steps - self.cur_steps) * avg_time_per_step
 36 |         self.elapsed_time = str(timedelta(seconds=int(elapsed_time)))
 37 |         self.remaining_time = str(timedelta(seconds=int(remaining_time)))
 38 | 
 39 |     def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
 40 |         r"""
 41 |         Event called at the beginning of training.
 42 |         """
 43 |         if state.is_local_process_zero:
 44 |             self.in_training = True
 45 |             self.start_time = time.time()
 46 |             self.max_steps = state.max_steps
 47 |             if os.path.exists(os.path.join(args.output_dir, LOG_FILE_NAME)):
 48 |                 logger.warning("Previous log file in this folder will be deleted.")
 49 |                 os.remove(os.path.join(args.output_dir, LOG_FILE_NAME))
 50 | 
 51 |     def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
 52 |         r"""
 53 |         Event called at the end of training.
 54 |         """
 55 |         if state.is_local_process_zero:
 56 |             self.in_training = False
 57 |             self.cur_steps = 0
 58 |             self.max_steps = 0
 59 | 
 60 |     def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
 61 |         r"""
 62 |         Event called at the end of an substep during gradient accumulation.
 63 |         """
 64 |         if state.is_local_process_zero and self.runner is not None and self.runner.aborted:
 65 |             control.should_epoch_stop = True
 66 |             control.should_training_stop = True
 67 | 
 68 |     def on_step_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
 69 |         r"""
 70 |         Event called at the end of a training step.
 71 |         """
 72 |         if state.is_local_process_zero:
 73 |             self.cur_steps = state.global_step
 74 |             self.timing()
 75 |             if self.runner is not None and self.runner.aborted:
 76 |                 control.should_epoch_stop = True
 77 |                 control.should_training_stop = True
 78 | 
 79 |     def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
 80 |         r"""
 81 |         Event called after an evaluation phase.
 82 |         """
 83 |         if state.is_local_process_zero and not self.in_training:
 84 |             self.cur_steps = 0
 85 |             self.max_steps = 0
 86 | 
 87 |     def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", *other, **kwargs):
 88 |         r"""
 89 |         Event called after a successful prediction.
 90 |         """
 91 |         if state.is_local_process_zero and not self.in_training:
 92 |             self.cur_steps = 0
 93 |             self.max_steps = 0
 94 | 
 95 |     def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs) -> None:
 96 |         r"""
 97 |         Event called after logging the last logs.
 98 |         """
 99 |         if not state.is_local_process_zero:
100 |             return
101 | 
102 |         logs = dict(
103 |             current_steps=self.cur_steps,
104 |             total_steps=self.max_steps,
105 |             loss=state.log_history[-1].get("loss", None),
106 |             eval_loss=state.log_history[-1].get("eval_loss", None),
107 |             predict_loss=state.log_history[-1].get("predict_loss", None),
108 |             reward=state.log_history[-1].get("reward", None),
109 |             learning_rate=state.log_history[-1].get("learning_rate", None),
110 |             epoch=state.log_history[-1].get("epoch", None),
111 |             percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
112 |             elapsed_time=self.elapsed_time,
113 |             remaining_time=self.remaining_time
114 |         )
115 |         os.makedirs(args.output_dir, exist_ok=True)
116 |         with open(os.path.join(args.output_dir, "trainer_log.jsonl"), "a", encoding="utf-8") as f:
117 |             f.write(json.dumps(logs) + "\n")
118 | 
119 |     def on_prediction_step(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
120 |         r"""
121 |         Event called after a prediction step.
122 |         """
123 |         eval_dataloader = kwargs.pop("eval_dataloader", None)
124 |         if state.is_local_process_zero and has_length(eval_dataloader) and not self.in_training:
125 |             if self.max_steps == 0:
126 |                 self.max_steps = len(eval_dataloader)
127 |             self.cur_steps += 1
128 |             self.timing()
129 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import gradio as gr
  4 | import matplotlib.figure
  5 | import matplotlib.pyplot as plt
  6 | from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
  7 | from datetime import datetime
  8 | 
  9 | from llmtuner.dsets.utils import EXT2TYPE
 10 | from llmtuner.extras.ploting import smooth
 11 | from llmtuner.tuner import export_model
 12 | from llmtuner.webui.common import get_model_path, get_save_dir, DATA_CONFIG
 13 | from llmtuner.webui.locales import ALERTS
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from llmtuner.extras.callbacks import LogCallback
 17 | 
 18 | 
 19 | def update_process_bar(callback: "LogCallback") -> Dict[str, Any]:
 20 |     if not callback.max_steps:
 21 |         return gr.update(visible=False)
 22 | 
 23 |     percentage = round(100 * callback.cur_steps / callback.max_steps, 0) if callback.max_steps != 0 else 100.0
 24 |     label = "Running {:d}/{:d}: {} < {}".format(
 25 |         callback.cur_steps,
 26 |         callback.max_steps,
 27 |         callback.elapsed_time,
 28 |         callback.remaining_time
 29 |     )
 30 |     return gr.update(label=label, value=percentage, visible=True)
 31 | 
 32 | 
 33 | def get_time() -> str:
 34 |     return datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
 35 | 
 36 | 
 37 | def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]:
 38 |     with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
 39 |         dataset_info = json.load(f)
 40 |     if (
 41 |         len(dataset) > 0
 42 |         and "file_name" in dataset_info[dataset[0]]
 43 |         and os.path.isfile(os.path.join(dataset_dir, dataset_info[dataset[0]]["file_name"]))
 44 |     ):
 45 |         return gr.update(interactive=True)
 46 |     else:
 47 |         return gr.update(interactive=False)
 48 | 
 49 | 
 50 | def get_preview(dataset_dir: str, dataset: list) -> Tuple[int, list, Dict[str, Any]]:
 51 |     with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f:
 52 |         dataset_info = json.load(f)
 53 |     data_file = dataset_info[dataset[0]]["file_name"]
 54 |     data = []
 55 |     data_format = EXT2TYPE.get(data_file.split(".")[-1], None)
 56 |     if data_format == "text":
 57 |         with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f:
 58 |             for line in f:
 59 |                 data.append(line.strip())
 60 |     elif data_format == "json":
 61 |         with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f:
 62 |             data = json.load(f)
 63 |     return len(data), data[:2], gr.update(visible=True)
 64 | 
 65 | 
 66 | def can_quantize(finetuning_type: str) -> Dict[str, Any]:
 67 |     if finetuning_type != "lora":
 68 |         return gr.update(value="", interactive=False)
 69 |     else:
 70 |         return gr.update(interactive=True)
 71 | 
 72 | 
 73 | def gen_cmd(args: Dict[str, Any]) -> str:
 74 |     if args.get("do_train", None):
 75 |         args["plot_loss"] = True
 76 |     cmd_lines = ["CUDA_VISIBLE_DEVICES=0 python "]
 77 |     for k, v in args.items():
 78 |         if v is not None and v != "":
 79 |             cmd_lines.append("    --{} {} ".format(k, str(v)))
 80 |     cmd_text = "\\\n".join(cmd_lines)
 81 |     cmd_text = "```bash\n{}\n```".format(cmd_text)
 82 |     return cmd_text
 83 | 
 84 | 
 85 | def get_eval_results(path: os.PathLike) -> str:
 86 |     with open(path, "r", encoding="utf-8") as f:
 87 |         result = json.dumps(json.load(f), indent=4)
 88 |     return "```json\n{}\n```\n".format(result)
 89 | 
 90 | 
 91 | def gen_plot(base_model: str, finetuning_type: str, output_dir: str) -> matplotlib.figure.Figure:
 92 |     log_file = os.path.join(get_save_dir(base_model), finetuning_type, output_dir, "trainer_log.jsonl")
 93 |     if not os.path.isfile(log_file):
 94 |         return None
 95 | 
 96 |     plt.close("all")
 97 |     fig = plt.figure()
 98 |     ax = fig.add_subplot(111)
 99 |     steps, losses = [], []
100 |     with open(log_file, "r", encoding="utf-8") as f:
101 |         for line in f:
102 |             log_info = json.loads(line)
103 |             if log_info.get("loss", None):
104 |                 steps.append(log_info["current_steps"])
105 |                 losses.append(log_info["loss"])
106 | 
107 |     if len(losses) == 0:
108 |         return None
109 | 
110 |     ax.plot(steps, losses, alpha=0.4, label="original")
111 |     ax.plot(steps, smooth(losses), label="smoothed")
112 |     ax.legend()
113 |     ax.set_xlabel("step")
114 |     ax.set_ylabel("loss")
115 |     return fig
116 | 
117 | 
118 | def save_model(
119 |     lang: str,
120 |     model_name: str,
121 |     checkpoints: List[str],
122 |     finetuning_type: str,
123 |     template: str,
124 |     max_shard_size: int,
125 |     save_dir: str
126 | ) -> Generator[str, None, None]:
127 |     if not model_name:
128 |         yield ALERTS["err_no_model"][lang]
129 |         return
130 | 
131 |     model_name_or_path = get_model_path(model_name)
132 |     if not model_name_or_path:
133 |         yield ALERTS["err_no_path"][lang]
134 |         return
135 | 
136 |     if not checkpoints:
137 |         yield ALERTS["err_no_checkpoint"][lang]
138 |         return
139 | 
140 |     checkpoint_dir = ",".join(
141 |             [os.path.join(get_save_dir(model_name), finetuning_type, checkpoint) for checkpoint in checkpoints]
142 |         )
143 | 
144 |     if not save_dir:
145 |         yield ALERTS["err_no_save_dir"][lang]
146 |         return
147 | 
148 |     args = dict(
149 |         model_name_or_path=model_name_or_path,
150 |         checkpoint_dir=checkpoint_dir,
151 |         finetuning_type=finetuning_type,
152 |         template=template,
153 |         output_dir=save_dir
154 |     )
155 | 
156 |     yield ALERTS["info_exporting"][lang]
157 |     export_model(args, max_shard_size="{}GB".format(max_shard_size))
158 |     yield ALERTS["info_exported"][lang]
159 | 


--------------------------------------------------------------------------------
/src/llmtuner/dsets/loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import TYPE_CHECKING, List, Union
  3 | 
  4 | from datasets import concatenate_datasets, interleave_datasets, load_dataset, load_from_disk
  5 | 
  6 | from llmtuner.dsets.utils import checksum, EXT2TYPE
  7 | from llmtuner.extras.logging import get_logger
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from datasets import Dataset, IterableDataset
 11 |     from llmtuner.hparams import ModelArguments, DataArguments
 12 | 
 13 | 
 14 | logger = get_logger(__name__)
 15 | 
 16 | 
 17 | def get_dataset(
 18 |     model_args: "ModelArguments",
 19 |     data_args: "DataArguments"
 20 | ) -> Union["Dataset", "IterableDataset"]:
 21 | 
 22 |     if data_args.preprocessed_dataset_path:
 23 |         logger.info("Loading preprocessed dataset {}...".format(data_args.preprocessed_dataset_path))
 24 |         dataset = load_from_disk(data_args.preprocessed_dataset_path)
 25 |         return dataset
 26 | 
 27 | 
 28 |     max_samples = data_args.max_samples
 29 |     all_datasets: List[Union["Dataset", "IterableDataset"]] = [] # support multiple datasets
 30 | 
 31 |     for dataset_attr in data_args.dataset_list:
 32 |         logger.info("Loading dataset {}...".format(dataset_attr))
 33 | 
 34 |         if dataset_attr.load_from == "hf_hub":
 35 |             data_path = dataset_attr.dataset_name
 36 |             data_files = None
 37 |         elif dataset_attr.load_from == "script":
 38 |             data_path = os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)
 39 |             data_files = None
 40 |         elif dataset_attr.load_from == "file":
 41 |             data_path = None
 42 |             data_files: List[str] = []
 43 | 
 44 |             if os.path.isdir(os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)): # directory
 45 |                 for file_name in os.listdir(os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)):
 46 |                     data_files.append(os.path.join(data_args.dataset_dir, dataset_attr.dataset_name, file_name))
 47 |                     if data_path is None:
 48 |                         data_path = EXT2TYPE.get(file_name.split(".")[-1], None)
 49 |                     else:
 50 |                         assert data_path == EXT2TYPE.get(file_name.split(".")[-1], None), "file type does not match."
 51 |             elif os.path.isfile(os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)): # single file
 52 |                 data_files.append(os.path.join(data_args.dataset_dir, dataset_attr.dataset_name))
 53 |                 data_path = EXT2TYPE.get(dataset_attr.dataset_name.split(".")[-1], None)
 54 |             else:
 55 |                 raise ValueError("File not found.")
 56 | 
 57 |             assert data_path, "File extension must be txt, csv, json or jsonl."
 58 |             checksum(data_files, dataset_attr.dataset_sha1)
 59 |         else:
 60 |             raise NotImplementedError
 61 |         
 62 |         logger.info('data_path={}, data_files={}, split={}, cache_dir={}, streaming={}, use_auth_token={}'.format(data_path, 
 63 |                                                                                                                   data_files, 
 64 |                                                                                                                   data_args.split, 
 65 |                                                                                                                   model_args.cache_dir, 
 66 |                                                                                                                   data_args.streaming, 
 67 |                                                                                                                   model_args.use_auth_token))
 68 |         dataset = load_dataset(
 69 |             data_path,
 70 |             data_files=data_files,
 71 |             split=data_args.split,
 72 |             cache_dir=model_args.cache_dir,
 73 |             streaming=data_args.streaming,
 74 |             use_auth_token=True if model_args.use_auth_token else None
 75 |         )
 76 | 
 77 |         if max_samples is not None:
 78 |             max_samples_temp = min(len(dataset), max_samples)
 79 |             dataset = dataset.select(range(max_samples_temp))
 80 |         
 81 |         for column_name in ["prompt", "query", "response", "history"]: # align datasets
 82 |             if getattr(dataset_attr, column_name) and getattr(dataset_attr, column_name) != column_name and dataset.column_names:
 83 |                 dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
 84 | 
 85 |         if dataset_attr.system_prompt: # add system prompt
 86 |             if data_args.streaming:
 87 |                 dataset = dataset.map(lambda _: {"system": dataset_attr.system_prompt})
 88 |             else:
 89 |                 dataset = dataset.add_column("system", [dataset_attr.system_prompt] * len(dataset))
 90 | 
 91 |         all_datasets.append(dataset)
 92 | 
 93 |     if len(data_args.dataset_list) == 1:
 94 |         return all_datasets[0]
 95 |     elif data_args.mix_strategy == "concat":
 96 |         if data_args.streaming:
 97 |             logger.warning("The samples between different datasets will not be mixed in streaming mode.")
 98 |         return concatenate_datasets(all_datasets)
 99 |     elif data_args.mix_strategy.startswith("interleave"):
100 |         if not data_args.streaming:
101 |             logger.warning("We recommend using `mix_strategy=concat` in non-streaming mode.")
102 |         stopping_strategy = "first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted"
103 |         return interleave_datasets(all_datasets, data_args.interleave_probs, stopping_strategy=stopping_strategy)
104 |     else:
105 |         raise ValueError("Unknown mixing strategy.")
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     dataset = load_dataset(
110 |             'json',
111 |             data_files=['/mnt/llm/devopspal/data/sst_data/devopspal_corpus.json'],
112 |             split='train',
113 |             cache_dir=None,
114 |             streaming=False,
115 |             use_auth_token=None
116 |         )
117 |     # data_path=json, data_files=['/mnt/llm/devopspal/data/sst_data/devopspal_corpus.json'], split=train, cache_dir=None, streaming=False, use_auth_token=False


--------------------------------------------------------------------------------
/src/llmtuner/hparams/data_args.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from typing import List, Literal, Optional
  4 | from dataclasses import dataclass, field
  5 | 
  6 | from llmtuner.extras.logging import get_logger
  7 | 
  8 | logger = get_logger(__name__)
  9 | 
 10 | @dataclass
 11 | class DatasetAttr:
 12 | 
 13 |     load_from: str
 14 |     dataset_name: Optional[str] = None
 15 |     dataset_sha1: Optional[str] = None
 16 |     system_prompt: Optional[str] = None
 17 |     stage: Optional[str] = None
 18 | 
 19 |     def __repr__(self) -> str:
 20 |         return self.dataset_name
 21 | 
 22 |     def __post_init__(self):
 23 |         self.prompt = "instruction"
 24 |         self.query = "input"
 25 |         self.response = "output"
 26 |         self.history = None
 27 | 
 28 | 
 29 | @dataclass
 30 | class DataArguments:
 31 |     r"""
 32 |     Arguments pertaining to what data we are going to input our model for training and evaluation.
 33 |     """
 34 |     template: str = field(
 35 |         metadata={"help": "Which template to use for constructing prompts in training and inference."}
 36 |     )
 37 |     dataset: Optional[str] = field(
 38 |         default="alpaca_en",
 39 |         metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."}
 40 |     )
 41 |     dataset_dir: Optional[str] = field(
 42 |         default="data",
 43 |         metadata={"help": "The name of the folder containing datasets."}
 44 |     )
 45 |     split: Optional[str] = field(
 46 |         default="train",
 47 |         metadata={"help": "Which dataset split to use for training and evaluation."}
 48 |     )
 49 |     streaming: Optional[bool] = field(
 50 |         default=False,
 51 |         metadata={"help": "Enable streaming mode."}
 52 |     )
 53 |     buffer_size: Optional[int] = field(
 54 |         default=16384,
 55 |         metadata={"help": "Size of the buffer to randomly sample examples from in streaming mode."}
 56 |     )
 57 |     mix_strategy: Optional[Literal["concat", "interleave_under", "interleave_over"]] = field(
 58 |         default="concat",
 59 |         metadata={"help": "Strategy to use in dataset mixing."}
 60 |     )
 61 |     interleave_probs: Optional[str] = field(
 62 |         default=None,
 63 |         metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."}
 64 |     )
 65 |     overwrite_cache: Optional[bool] = field(
 66 |         default=False,
 67 |         metadata={"help": "Overwrite the cached training and evaluation sets."}
 68 |     )
 69 |     preprocessing_num_workers: Optional[int] = field(
 70 |         default=None,
 71 |         metadata={"help": "The number of processes to use for the preprocessing."}
 72 |     )
 73 |     max_source_length: Optional[int] = field(
 74 |         default=512,
 75 |         metadata={"help": "The maximum total input sequence length after tokenization."}
 76 |     )
 77 |     max_target_length: Optional[int] = field(
 78 |         default=512,
 79 |         metadata={"help": "The maximum total output sequence length after tokenization."}
 80 |     )
 81 |     max_samples: Optional[int] = field(
 82 |         default=None,
 83 |         metadata={"help": "For debugging purposes, truncate the number of examples for each dataset."}
 84 |     )
 85 |     eval_num_beams: Optional[int] = field(
 86 |         default=None,
 87 |         metadata={"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`"}
 88 |     )
 89 |     ignore_pad_token_for_loss: Optional[bool] = field(
 90 |         default=True,
 91 |         metadata={"help": "Whether to ignore the tokens corresponding to padded labels in the loss computation or not."}
 92 |     )
 93 |     system_prompt: Optional[str] = field(
 94 |         default=None,
 95 |         metadata={"help": "System prompt to add before the user query. Use `|` to separate multiple prompts in training."}
 96 |     )
 97 |     val_size: Optional[float] = field(
 98 |         default=0,
 99 |         metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."}
100 |     )
101 | 
102 |     save_dataset_path: Optional[str] = field(
103 |         default='',
104 |         metadata={"help": "If provided, will save the preprocessed dataset to this path for future use"}
105 |     )
106 | 
107 |     preprocessed_dataset_path: Optional[str] = field(
108 |         default='',
109 |         metadata={"help": "If provided, will load the preprocessed dataset directlly"}
110 |     )
111 | 
112 |     def init_for_training(self): # support mixing multiple datasets
113 |         dataset_names = [ds.strip() for ds in self.dataset.split(",")]
114 |         with open(os.path.join(self.dataset_dir, "dataset_info.json"), "r") as f:
115 |             dataset_info = json.load(f)
116 | 
117 |         prompt_list = self.system_prompt.split("|") if self.system_prompt else [None]
118 |         prompt_list = prompt_list * (len(dataset_names) // len(prompt_list))
119 |         assert len(prompt_list) == len(dataset_names), "Number of system prompts should be equal to datasets or 1."
120 | 
121 |         if self.interleave_probs is not None:
122 |             self.interleave_probs = [float(prob.strip()) for prob in self.interleave_probs.split(",")]
123 | 
124 |         self.dataset_list: List[DatasetAttr] = []
125 |         for i, name in enumerate(dataset_names):
126 |             if name not in dataset_info:
127 |                 raise ValueError("Undefined dataset {} in dataset_info.json.".format(name))
128 | 
129 |             if "hf_hub_url" in dataset_info[name]:
130 |                 dataset_attr = DatasetAttr(
131 |                     "hf_hub",
132 |                     dataset_name=dataset_info[name]["hf_hub_url"],
133 |                     stage=dataset_info[name].get("stage", None))
134 |             elif "script_url" in dataset_info[name]:
135 |                 dataset_attr = DatasetAttr(
136 |                     "script",
137 |                     dataset_name=dataset_info[name]["script_url"],
138 |                     stage=dataset_info[name].get("stage", None))
139 |             else:
140 |                 dataset_attr = DatasetAttr(
141 |                     "file",
142 |                     dataset_name=dataset_info[name]["file_name"],
143 |                     dataset_sha1=dataset_info[name].get("file_sha1", None),
144 |                     stage=dataset_info[name].get("stage", None)
145 |                 )
146 | 
147 |             if "columns" in dataset_info[name]:
148 |                 dataset_attr.prompt = dataset_info[name]["columns"].get("prompt", 'instruction')
149 |                 dataset_attr.query = dataset_info[name]["columns"].get("query", 'input')
150 |                 dataset_attr.response = dataset_info[name]["columns"].get("response", 'output')
151 |                 dataset_attr.history = dataset_info[name]["columns"].get("history", None)
152 |                 
153 |             dataset_attr.system_prompt = prompt_list[i]
154 |             self.dataset_list.append(dataset_attr)
155 | 


--------------------------------------------------------------------------------
/src/llmtuner/webui/components/train.py:
--------------------------------------------------------------------------------
  1 | from typing import TYPE_CHECKING, Dict
  2 | from transformers.trainer_utils import SchedulerType
  3 | 
  4 | import gradio as gr
  5 | 
  6 | from llmtuner.extras.constants import STAGES
  7 | from llmtuner.webui.common import list_checkpoint, list_dataset, DEFAULT_DATA_DIR
  8 | from llmtuner.webui.components.data import create_preview_box
  9 | from llmtuner.webui.utils import can_preview, get_preview, gen_plot
 10 | 
 11 | if TYPE_CHECKING:
 12 |     from gradio.components import Component
 13 |     from llmtuner.webui.runner import Runner
 14 | 
 15 | 
 16 | def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict[str, "Component"]:
 17 |     with gr.Row():
 18 |         training_stage = gr.Dropdown(choices=STAGES, value=STAGES[0], scale=2)
 19 |         dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2)
 20 |         dataset = gr.Dropdown(multiselect=True, scale=4)
 21 |         data_preview_btn = gr.Button(interactive=False, scale=1)
 22 | 
 23 |     preview_box, preview_count, preview_samples, close_btn = create_preview_box()
 24 | 
 25 |     training_stage.change(list_dataset, [dataset_dir, training_stage], [dataset])
 26 |     dataset_dir.change(list_dataset, [dataset_dir, training_stage], [dataset])
 27 |     dataset.change(can_preview, [dataset_dir, dataset], [data_preview_btn])
 28 |     data_preview_btn.click(
 29 |         get_preview,
 30 |         [dataset_dir, dataset],
 31 |         [preview_count, preview_samples, preview_box],
 32 |         queue=False
 33 |     )
 34 | 
 35 |     with gr.Row():
 36 |         max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
 37 |         max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
 38 |         learning_rate = gr.Textbox(value="5e-5")
 39 |         num_train_epochs = gr.Textbox(value="3.0")
 40 |         max_samples = gr.Textbox(value="100000")
 41 | 
 42 |     with gr.Row():
 43 |         batch_size = gr.Slider(value=4, minimum=1, maximum=512, step=1)
 44 |         gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=512, step=1)
 45 |         lr_scheduler_type = gr.Dropdown(
 46 |             choices=[scheduler.value for scheduler in SchedulerType], value="cosine"
 47 |         )
 48 |         max_grad_norm = gr.Textbox(value="1.0")
 49 |         val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001)
 50 | 
 51 |     with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
 52 |         with gr.Row():
 53 |             logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
 54 |             save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
 55 |             warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
 56 |             compute_type = gr.Radio(choices=["fp16", "bf16"], value="fp16")
 57 |             padding_side = gr.Radio(choices=["left", "right"], value="left")
 58 | 
 59 |     with gr.Accordion(label="LoRA config", open=False) as lora_tab:
 60 |         with gr.Row():
 61 |             lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
 62 |             lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
 63 |             lora_target = gr.Textbox(scale=2)
 64 |             resume_lora_training = gr.Checkbox(value=True, scale=1)
 65 | 
 66 |     with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:
 67 |         with gr.Row():
 68 |             dpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=2)
 69 |             reward_model = gr.Dropdown(scale=2)
 70 |             refresh_btn = gr.Button(scale=1)
 71 | 
 72 |     refresh_btn.click(
 73 |         list_checkpoint,
 74 |         [top_elems["model_name"], top_elems["finetuning_type"]],
 75 |         [reward_model],
 76 |         queue=False
 77 |     )
 78 | 
 79 |     with gr.Row():
 80 |         cmd_preview_btn = gr.Button()
 81 |         start_btn = gr.Button()
 82 |         stop_btn = gr.Button()
 83 | 
 84 |     with gr.Row():
 85 |         with gr.Column(scale=3):
 86 |             with gr.Row():
 87 |                 output_dir = gr.Textbox()
 88 | 
 89 |             with gr.Row():
 90 |                 process_bar = gr.Slider(visible=False, interactive=False)
 91 | 
 92 |             with gr.Box():
 93 |                 output_box = gr.Markdown()
 94 | 
 95 |         with gr.Column(scale=1):
 96 |             loss_viewer = gr.Plot()
 97 | 
 98 |     input_components = [
 99 |         top_elems["lang"],
100 |         top_elems["model_name"],
101 |         top_elems["checkpoints"],
102 |         top_elems["finetuning_type"],
103 |         top_elems["quantization_bit"],
104 |         top_elems["template"],
105 |         top_elems["system_prompt"],
106 |         training_stage,
107 |         dataset_dir,
108 |         dataset,
109 |         max_source_length,
110 |         max_target_length,
111 |         learning_rate,
112 |         num_train_epochs,
113 |         max_samples,
114 |         batch_size,
115 |         gradient_accumulation_steps,
116 |         lr_scheduler_type,
117 |         max_grad_norm,
118 |         val_size,
119 |         logging_steps,
120 |         save_steps,
121 |         warmup_steps,
122 |         compute_type,
123 |         padding_side,
124 |         lora_rank,
125 |         lora_dropout,
126 |         lora_target,
127 |         resume_lora_training,
128 |         dpo_beta,
129 |         reward_model,
130 |         output_dir
131 |     ]
132 | 
133 |     output_components = [
134 |         output_box,
135 |         process_bar
136 |     ]
137 | 
138 |     cmd_preview_btn.click(runner.preview_train, input_components, output_components)
139 |     start_btn.click(runner.run_train, input_components, output_components)
140 |     stop_btn.click(runner.set_abort, queue=False)
141 | 
142 |     process_bar.change(
143 |         gen_plot, [top_elems["model_name"], top_elems["finetuning_type"], output_dir], loss_viewer, queue=False
144 |     )
145 | 
146 |     return dict(
147 |         training_stage=training_stage,
148 |         dataset_dir=dataset_dir,
149 |         dataset=dataset,
150 |         data_preview_btn=data_preview_btn,
151 |         preview_count=preview_count,
152 |         preview_samples=preview_samples,
153 |         close_btn=close_btn,
154 |         max_source_length=max_source_length,
155 |         max_target_length=max_target_length,
156 |         learning_rate=learning_rate,
157 |         num_train_epochs=num_train_epochs,
158 |         max_samples=max_samples,
159 |         batch_size=batch_size,
160 |         gradient_accumulation_steps=gradient_accumulation_steps,
161 |         lr_scheduler_type=lr_scheduler_type,
162 |         max_grad_norm=max_grad_norm,
163 |         val_size=val_size,
164 |         advanced_tab=advanced_tab,
165 |         logging_steps=logging_steps,
166 |         save_steps=save_steps,
167 |         warmup_steps=warmup_steps,
168 |         compute_type=compute_type,
169 |         padding_side=padding_side,
170 |         lora_tab=lora_tab,
171 |         lora_rank=lora_rank,
172 |         lora_dropout=lora_dropout,
173 |         lora_target=lora_target,
174 |         resume_lora_training=resume_lora_training,
175 |         rlhf_tab=rlhf_tab,
176 |         dpo_beta=dpo_beta,
177 |         reward_model=reward_model,
178 |         refresh_btn=refresh_btn,
179 |         cmd_preview_btn=cmd_preview_btn,
180 |         start_btn=start_btn,
181 |         stop_btn=stop_btn,
182 |         output_dir=output_dir,
183 |         output_box=output_box,
184 |         loss_viewer=loss_viewer
185 |     )
186 | 


--------------------------------------------------------------------------------
/data/dataset_info.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "alpaca_en": {
  3 |     "file_name": "alpaca_data_en_52k.json",
  4 |     "file_sha1": "607f94a7f581341e59685aef32f531095232cf23",
  5 |     "stage": "sft"
  6 |   },
  7 |   "alpaca_zh": {
  8 |     "file_name": "alpaca_data_zh_51k.json",
  9 |     "file_sha1": "e655af3db557a4197f7b0cf92e1986b08fae6311",
 10 |     "stage": "sft"
 11 |   },
 12 |   "alpaca_gpt4_en": {
 13 |     "file_name": "alpaca_gpt4_data_en.json",
 14 |     "file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a",
 15 |     "stage": "sft"
 16 |   },
 17 |   "alpaca_gpt4_zh": {
 18 |     "file_name": "alpaca_gpt4_data_zh.json",
 19 |     "file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845",
 20 |     "stage": "sft"
 21 |   },
 22 |   "self_cognition": {
 23 |     "file_name": "self_cognition.json",
 24 |     "file_sha1": "6287a730ada924fc5d9eadc6d8f865e01b7a6f67",
 25 |     "stage": "sft"
 26 |   },
 27 |   "oaast_sft": {
 28 |     "file_name": "oaast_sft.json",
 29 |     "file_sha1": "7baf5d43e67a91f9bbdf4e400dbe033b87e9757e",
 30 |     "columns": {
 31 |       "prompt": "instruction",
 32 |       "query": "input",
 33 |       "response": "output",
 34 |       "history": "history"
 35 |     },
 36 |     "stage": "sft"
 37 |   },
 38 |   "oaast_sft_zh": {
 39 |     "file_name": "oaast_sft_zh.json",
 40 |     "file_sha1": "a6a91f18f80f37b10ded9cf633fb50c033bf7b9f",
 41 |     "columns": {
 42 |       "prompt": "instruction",
 43 |       "query": "input",
 44 |       "response": "output",
 45 |       "history": "history"
 46 |     },
 47 |     "stage": "sft"
 48 |   },
 49 |   "sharegpt_zh": {
 50 |     "file_name": "sharegpt_zh_27k.json",
 51 |     "file_sha1": "baf766bcf3d61f1b783728c14ce695af57a86e6e",
 52 |     "columns": {
 53 |       "prompt": "instruction",
 54 |       "query": "input",
 55 |       "response": "output",
 56 |       "history": "history"
 57 |     },
 58 |     "stage": "sft"
 59 |   },
 60 |   "lima": {
 61 |     "file_name": "lima.json",
 62 |     "file_sha1": "9db59f6b7007dc4b17529fc63379b9cd61640f37",
 63 |     "columns": {
 64 |       "prompt": "instruction",
 65 |       "query": "input",
 66 |       "response": "output",
 67 |       "history": "history"
 68 |     },
 69 |     "stage": "sft"
 70 |   },
 71 |   "example": {
 72 |     "script_url": "example_dataset",
 73 |     "columns": {
 74 |       "prompt": "instruction",
 75 |       "query": "input",
 76 |       "response": "output",
 77 |       "history": "history"
 78 |     },
 79 |     "stage": "sft"
 80 |   },
 81 |   "guanaco": {
 82 |     "hf_hub_url": "JosephusCheung/GuanacoDataset",
 83 |     "stage": "sft"
 84 |   },
 85 |   "belle_0.5m": {
 86 |     "hf_hub_url": "BelleGroup/train_0.5M_CN",
 87 |     "stage": "sft"
 88 |   },
 89 |   "belle_1m": {
 90 |     "hf_hub_url": "BelleGroup/train_1M_CN",
 91 |     "stage": "sft"
 92 |   },
 93 |   "belle_2m": {
 94 |     "hf_hub_url": "BelleGroup/train_2M_CN",
 95 |     "stage": "sft"
 96 |   },
 97 |   "belle_dialog": {
 98 |     "hf_hub_url": "BelleGroup/generated_chat_0.4M",
 99 |     "stage": "sft"
100 |   },
101 |   "belle_math": {
102 |     "hf_hub_url": "BelleGroup/school_math_0.25M",
103 |     "stage": "sft"
104 |   },
105 |   "belle_multiturn": {
106 |     "script_url": "belle_multiturn",
107 |     "columns": {
108 |       "prompt": "instruction",
109 |       "query": "",
110 |       "response": "output",
111 |       "history": "history"
112 |     },
113 |     "stage": "sft"
114 |   },
115 |   "firefly": {
116 |     "hf_hub_url": "YeungNLP/firefly-train-1.1M",
117 |     "columns": {
118 |       "prompt": "input",
119 |       "query": "",
120 |       "response": "target",
121 |       "history": ""
122 |     },
123 |     "stage": "sft"
124 |   },
125 |   "codealpaca": {
126 |     "hf_hub_url": "sahil2801/CodeAlpaca-20k",
127 |     "stage": "sft"
128 |   },
129 |   "alpaca_cot": {
130 |     "hf_hub_url": "QingyiSi/Alpaca-CoT",
131 |     "stage": "sft"
132 |   },
133 |   "webqa": {
134 |     "hf_hub_url": "suolyer/webqa",
135 |     "columns": {
136 |       "prompt": "input",
137 |       "query": "",
138 |       "response": "output",
139 |       "history": ""
140 |     },
141 |     "stage": "sft"
142 |   },
143 |   "ultra_chat": {
144 |     "script_url": "ultra_chat",
145 |     "columns": {
146 |       "prompt": "instruction",
147 |       "query": "",
148 |       "response": "output",
149 |       "history": "history"
150 |     },
151 |     "stage": "sft"
152 |   },
153 |   "novel_tokens512_50k": {
154 |     "hf_hub_url": "zxbsmk/webnovel_cn",
155 |     "stage": "sft"
156 |   },
157 |   "ad_gen": {
158 |     "hf_hub_url": "HasturOfficial/adgen",
159 |     "columns": {
160 |       "prompt": "content",
161 |       "query": "",
162 |       "response": "summary",
163 |       "history": ""
164 |     },
165 |     "stage": "sft"
166 |   },
167 |   "comparison_gpt4_en": {
168 |     "file_name": "comparison_gpt4_data_en.json",
169 |     "file_sha1": "96fa18313544e22444fe20eead7754b17da452ae",
170 |     "stage": "rm"
171 |   },
172 |   "comparison_gpt4_zh": {
173 |     "file_name": "comparison_gpt4_data_zh.json",
174 |     "file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
175 |     "stage": "rm"
176 |   },
177 |   "hh_rlhf_en": {
178 |     "script_url": "hh_rlhf_en",
179 |     "columns": {
180 |       "prompt": "instruction",
181 |       "query": "",
182 |       "response": "output",
183 |       "history": "history"
184 |     },
185 |     "stage": "rm"
186 |   },
187 |   "oaast_rm": {
188 |     "file_name": "oaast_rm.json",
189 |     "file_sha1": "622d420e9b70003b210618253bd3d9d2891d86cb",
190 |     "columns": {
191 |       "prompt": "instruction",
192 |       "query": "input",
193 |       "response": "output",
194 |       "history": "history"
195 |     },
196 |     "stage": "rm"
197 |   },
198 |   "oaast_rm_zh": {
199 |     "file_name": "oaast_rm_zh.json",
200 |     "file_sha1": "1065af1f3784dd61be5e79713a35f427b713a232",
201 |     "columns": {
202 |       "prompt": "instruction",
203 |       "query": "input",
204 |       "response": "output",
205 |       "history": "history"
206 |     },
207 |     "stage": "rm"
208 |   },
209 |   "wiki_demo": {
210 |     "file_name": "wiki_demo.txt",
211 |     "file_sha1": "b2288edb05b233e5b35250fd4b308a5fa21fa66d",
212 |     "columns": {
213 |       "prompt": "text",
214 |       "query": "",
215 |       "response": "",
216 |       "history": ""
217 |     },
218 |     "stage": "pt"
219 |   },
220 |   "refinedweb": {
221 |     "hf_hub_url": "tiiuae/falcon-refinedweb",
222 |     "columns": {
223 |       "prompt": "content",
224 |       "query": "",
225 |       "response": "",
226 |       "history": ""
227 |     },
228 |     "stage": "pt"
229 |   },
230 |   "starcoder": {
231 |     "hf_hub_url": "bigcode/starcoderdata",
232 |     "columns": {
233 |       "prompt": "content",
234 |       "query": "",
235 |       "response": "",
236 |       "history": ""
237 |     },
238 |     "stage": "pt"
239 |   },
240 |   "wikipedia_en": {
241 |     "hf_hub_url": "olm/olm-wikipedia-20221220",
242 |     "columns": {
243 |       "prompt": "text",
244 |       "query": "",
245 |       "response": "",
246 |       "history": ""
247 |     },
248 |     "stage": "pt"
249 |   },
250 |   "wikipedia_zh": {
251 |     "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
252 |     "columns": {
253 |       "prompt": "completion",
254 |       "query": "",
255 |       "response": "",
256 |       "history": ""
257 |     },
258 |     "stage": "pt"
259 |   },
260 |   "testing": {
261 |     "file_name": "/mnt/llm/devopspal/data/sst_data/test_data/test_data.json",
262 |     "columns": {
263 |       "prompt": "text",
264 |       "query": "",
265 |       "response": "",
266 |       "history": ""
267 |     },
268 |     "stage": "pt"
269 |   },
270 |   "testing_sft": {
271 |     "file_name": "/mnt/llm/devopspal/data/sft_data/sft_test.json",
272 |     "columns": {
273 |       "prompt": "input",
274 |       "query": "",
275 |       "response": "output",
276 |       "history": ""
277 |     },
278 |     "stage": "sft"
279 |   },
280 |   "devopspal_corpus": {
281 |     "file_name": "/mnt/llm/devopspal/data/sst_data/devopspal_corpus.jsonl",
282 |     "columns": {
283 |       "prompt": "text",
284 |       "query": "",
285 |       "response": "",
286 |       "history": ""
287 |     },
288 |     "stage": "pt"
289 |   }
290 | }
291 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import torch
  4 | from types import MethodType
  5 | from typing import TYPE_CHECKING, Literal, Optional, Tuple
  6 | 
  7 | from transformers import (
  8 |     AutoConfig,
  9 |     AutoModelForCausalLM,
 10 |     AutoTokenizer,
 11 |     BitsAndBytesConfig,
 12 |     PretrainedConfig,
 13 |     PreTrainedModel,
 14 |     PreTrainedTokenizerBase
 15 | )
 16 | from transformers.utils import check_min_version
 17 | from transformers.utils.versions import require_version
 18 | from transformers.deepspeed import is_deepspeed_zero3_enabled
 19 | from trl import AutoModelForCausalLMWithValueHead
 20 | 
 21 | from llmtuner.extras.logging import reset_logging, get_logger
 22 | from llmtuner.extras.misc import count_parameters, prepare_model_for_training
 23 | from llmtuner.extras.save_and_load import load_valuehead_params
 24 | from llmtuner.hparams import FinetuningArguments
 25 | from llmtuner.tuner.core.adapter import init_adapter
 26 | 
 27 | if TYPE_CHECKING:
 28 |     from transformers import PreTrainedTokenizer
 29 |     from llmtuner.hparams import ModelArguments
 30 | 
 31 | 
 32 | logger = get_logger(__name__)
 33 | 
 34 | 
 35 | check_min_version("4.29.1")
 36 | require_version("datasets>=2.12.0", "To fix: pip install datasets>=2.12.0")
 37 | require_version("accelerate>=0.21.0", "To fix: pip install accelerate>=0.21.0")
 38 | require_version("peft>=0.4.0", "To fix: pip install peft>=0.4.0")
 39 | require_version("trl>=0.5.0", "To fix: pip install trl>=0.5.0")
 40 | 
 41 | 
 42 | def load_model_and_tokenizer(
 43 |     model_args: "ModelArguments",
 44 |     finetuning_args: "FinetuningArguments",
 45 |     is_trainable: Optional[bool] = False,
 46 |     stage: Optional[Literal["pt", "sft", "rm", "ppo"]] = "sft"
 47 | ) -> Tuple[PreTrainedModel, "PreTrainedTokenizer"]:
 48 |     r"""
 49 |     Loads pretrained model and tokenizer.
 50 | 
 51 |     Support both training and inference.
 52 |     """
 53 |     if (not is_trainable) and model_args.checkpoint_dir is None:
 54 |         logger.warning("Checkpoint is not found at evaluation, load the original model.")
 55 |         finetuning_args = FinetuningArguments(finetuning_type="none")
 56 | 
 57 |     config_kwargs = {
 58 |         "trust_remote_code": True,
 59 |         "cache_dir": model_args.cache_dir,
 60 |         "revision": model_args.model_revision,
 61 |         "use_auth_token": True if model_args.use_auth_token else None,
 62 |     }
 63 | 
 64 |     tokenizer = AutoTokenizer.from_pretrained(
 65 |         model_args.model_name_or_path,
 66 |         use_fast=model_args.use_fast_tokenizer,
 67 |         padding_side=model_args.padding_side,
 68 |         **config_kwargs
 69 |     )
 70 | 
 71 |     if finetuning_args.finetuning_type == "full" and model_args.checkpoint_dir is not None:
 72 |         model_to_load = model_args.checkpoint_dir[0]
 73 |     else:
 74 |         model_to_load = model_args.model_name_or_path
 75 | 
 76 |     config = AutoConfig.from_pretrained(model_to_load, **config_kwargs)
 77 | 
 78 |     if hasattr(config, "fp16") and hasattr(config, "bf16"): # fix Qwen config
 79 |         if model_args.compute_dtype == torch.bfloat16:
 80 |             setattr(config, "bf16", True)
 81 |         else:
 82 |             setattr(config, "fp16", True)
 83 | 
 84 |     # Set RoPE scaling
 85 |     if model_args.rope_scaling is not None:
 86 |         if hasattr(config, "use_dynamic_ntk"): # for Qwen models
 87 |             if is_trainable:
 88 |                 logger.warning("Qwen model does not support RoPE scaling in training.")
 89 |             else:
 90 |                 setattr(config, "use_dynamic_ntk", True)
 91 |                 setattr(config, "use_logn_attn", True)
 92 |                 logger.info("Using dynamic NTK scaling.")
 93 | 
 94 |         elif hasattr(config, "rope_scaling"): # for LLaMA models
 95 |             require_version("transformers>=4.31.0", "RoPE scaling requires transformers>=4.31.0")
 96 | 
 97 |             if is_trainable:
 98 |                 if model_args.rope_scaling == "dynamic":
 99 |                     logger.warning(
100 |                         "Dynamic NTK may not work well with fine-tuning. "
101 |                         "See: https://github.com/huggingface/transformers/pull/24653"
102 |                     )
103 | 
104 |                 current_max_length = getattr(config, "max_position_embeddings", None)
105 |                 if current_max_length and model_args.model_max_length > current_max_length:
106 |                     scaling_factor = float(math.ceil(model_args.model_max_length / current_max_length))
107 |                 else:
108 |                     logger.warning("Input length is smaller than max length. Consider increase input length.")
109 |                     scaling_factor = 1.0
110 |             else:
111 |                 scaling_factor = 2.0
112 | 
113 |             setattr(config, "rope_scaling", {"type": model_args.rope_scaling, "factor": scaling_factor})
114 |             logger.info("Using {} scaling strategy and setting scaling factor to {}".format(
115 |                 model_args.rope_scaling, scaling_factor
116 |             ))
117 | 
118 |         else:
119 |             logger.warning("Current model does not support RoPE scaling.")
120 | 
121 |     # Quantization configurations (using bitsandbytes library).
122 |     is_mergeable = True
123 |     if model_args.quantization_bit is not None:
124 |         if is_deepspeed_zero3_enabled():
125 |             raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
126 | 
127 |         if model_args.quantization_bit == 8:
128 |             require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
129 |             config_kwargs["load_in_8bit"] = True
130 |             config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
131 | 
132 |         elif model_args.quantization_bit == 4:
133 |             require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
134 |             config_kwargs["load_in_4bit"] = True
135 |             config_kwargs["quantization_config"] = BitsAndBytesConfig(
136 |                 load_in_4bit=True,
137 |                 bnb_4bit_compute_dtype=model_args.compute_dtype,
138 |                 bnb_4bit_use_double_quant=model_args.double_quantization,
139 |                 bnb_4bit_quant_type=model_args.quantization_type
140 |             )
141 | 
142 |         is_mergeable = False
143 |         config_kwargs["device_map"] = {"": int(os.environ.get("LOCAL_RANK", "0"))} if is_trainable else "auto"
144 |         logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
145 | 
146 |     # Load and prepare pre-trained models (without valuehead).
147 |     model = AutoModelForCausalLM.from_pretrained(
148 |         model_to_load,
149 |         config=config,
150 |         torch_dtype=model_args.compute_dtype,
151 |         low_cpu_mem_usage=(not is_deepspeed_zero3_enabled()),
152 |         **config_kwargs
153 |     )
154 | 
155 |     # Disable custom generate method (for Qwen)
156 |     if "GenerationMixin" not in str(model.generate.__func__):
157 |         model.generate = MethodType(PreTrainedModel.generate, model)
158 | 
159 |     # Fix LM head (for ChatGLM2)
160 |     if not hasattr(model, "lm_head") and hasattr(model, "transformer"):
161 |         setattr(model, "lm_head", model.transformer.output_layer)
162 | 
163 |     # Register auto class to save the custom code files.
164 |     if isinstance(config, PretrainedConfig) and "AutoConfig" in getattr(config, "auto_map", {}):
165 |         config.__class__.register_for_auto_class()
166 |     if isinstance(model, PreTrainedModel) and "AutoModelForCausalLM" in getattr(config, "auto_map", {}):
167 |         model.__class__.register_for_auto_class()
168 |     if isinstance(tokenizer, PreTrainedTokenizerBase) and "AutoTokenizer" in tokenizer.init_kwargs.get("auto_map", {}):
169 |         tokenizer.__class__.register_for_auto_class()
170 | 
171 |     # Initialize adapters
172 |     model = prepare_model_for_training(model, finetuning_args.finetuning_type) if is_trainable else model
173 |     model = init_adapter(model, model_args, finetuning_args, is_trainable, is_mergeable)
174 | 
175 |     # Prepare model with valuehead for RLHF
176 |     if stage == "rm" or stage == "ppo":
177 |         model: AutoModelForCausalLMWithValueHead = AutoModelForCausalLMWithValueHead.from_pretrained(model)
178 |         reset_logging()
179 |         if stage == "rm" and model_args.checkpoint_dir is not None: # load valuehead weights to evaluate reward model
180 |             logger.warning("Only the last checkpoint containing valuehead will be loaded as the valuehead.")
181 |             if load_valuehead_params(model, model_args.checkpoint_dir[-1]):
182 |                 model.v_head.load_state_dict({
183 |                     "summary.weight": getattr(model, "reward_head_weight"),
184 |                     "summary.bias": getattr(model, "reward_head_bias")
185 |                 })
186 | 
187 |         if stage == "ppo": # load reward model
188 |             logger.info("Load reward model from {}".format(model_args.reward_model))
189 |             model.pretrained_model.load_adapter(model_args.reward_model, "reward", is_trainable=False)
190 |             assert load_valuehead_params(model, model_args.reward_model), "Reward model is not correctly loaded."
191 | 
192 |     # Prepare model for inference
193 |     if not is_trainable:
194 |         model.requires_grad_(False) # fix all model params
195 |         infer_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16 # detect cuda capability
196 |         model = model.to(infer_dtype) if model_args.quantization_bit is None else model
197 | 
198 |     trainable_params, all_param = count_parameters(model)
199 |     logger.info("trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format(
200 |         trainable_params, all_param, 100 * trainable_params / all_param
201 |     ))
202 | 
203 |     return model, tokenizer
204 | 
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>
  3 |  DevOps-Model
  4 | </h1>
  5 | </div>
  6 | 
  7 | <p align="center">
  8 | 🤗 <a href="https://huggingface.co/codefuse-ai" target="_blank">Hugging Face</a> • 
  9 | 🤖 <a href="https://modelscope.cn/organization/codefuse-ai" target="_blank">ModelScope</a> </p>
 10 | 
 11 | <div align="center">
 12 | <h4 align="center">
 13 |     <p>
 14 |         <b>中文</b> |
 15 |         <a href="https://github.com/codefuse-ai/CodeFuse-DevOps-Model/blob/main/README_EN.md">English</a>
 16 |     <p>
 17 | </h4>
 18 | </div>
 19 | 
 20 | DevOps-Model 是一系列业界首个开源的**中文开发运维大模型**，主要致力于在 DevOps 领域发挥实际价值。目前，DevOps-Model 能够帮助工程师回答在 DevOps 生命周期中遇到的问题。
 21 | 
 22 | 我们基于 Qwen 系列模型，经过高质量中文 DevOps 语料加训后产出 **Base** 模型，然后经过 DevOps QA 数据对齐后产出 **Chat** 模型。我们的 Base 模型和 Chat 模型在开源和 DevOps 领域相关的评测数据上可以取得同规模模型中的**最佳效果**。欢迎来我们部署的在线试用地址试用模型效果：https://modelscope.cn/studios/codefuse-ai/DevOps-Model-Demo/summary
 23 | <br>
 24 | <br>
 25 | 同时我们也在搭建 DevOps 领域专属的评测基准 [DevOpsEval](https://github.com/codefuse-ai/codefuse-devops-eval)，用来更好评测 DevOps 领域模型的效果。
 26 | <br>
 27 | <br>
 28 | 
 29 | # 最新消息
 30 | - [2023.12.22] 我们部署了 DevOps-Model 的在线模型问答地址，欢迎试用！！！ https://modelscope.cn/studios/codefuse-ai/DevOps-Model-Demo/summary
 31 | - [2023.12.06] 更新 Huggingface 下载地址
 32 | - [2023.10.31] 开源 DevOps-Model-14B Base 和 Chat 模型。
 33 | - [2023.10.30] 开源 DevOps-Model-7B Base 和 Chat 模型。
 34 | 
 35 | 
 36 | # 模型下载
 37 | 开源模型和下载链接见下表：
 38 | 🤗 Huggingface 地址
 39 | 
 40 | |         | 基座模型  | 对齐模型 | 对齐模型 Int4 量化 |
 41 | |:-------:|:-------:|:-------:|:-----------------:|
 42 | | 7B      |  [DevOps-Model-7B-Base](https://huggingface.co/codefuse-ai/CodeFuse-DevOps-Model-7B-Base)| [DevOps-Model-7B-Chat](https://huggingface.co/codefuse-ai/CodeFuse-DevOps-Model-7B-Chat) | Coming Soon|
 43 | | 14B     | [DevOps-Model-14B-Base](https://huggingface.co/codefuse-ai/CodeFuse-DevOps-Model-14B-Base) | [DevOps-Model-14B-Chat](https://huggingface.co/codefuse-ai/CodeFuse-DevOps-Model-14B-Chat) | Coming Soon |
 44 | 
 45 | 
 46 | 🤖 ModelScope 地址
 47 | 
 48 | |         | 基座模型  | 对齐模型 | 对齐模型 Int4 量化 |
 49 | |:-------:|:-------:|:-------:|:-----------------:|
 50 | | 7B      |  [DevOps-Model-7B-Base](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-7B-Chat/summary) | [DevOps-Model-7B-Chat](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-7B-Chat/summary) | Coming Soon|
 51 | | 14B     | [DevOps-Model-14B-Base](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-14B-Base/summary) | [DevOps-Model-14B-Chat](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-14B-Chat/summary) | Coming Soon |
 52 | 
 53 | 
 54 | # 模型评测
 55 | 我们先选取了 CMMLU 和 CEval 两个评测数据集中和 DevOps 相关的一共六项考试。总计一共 574 道选择题，具体信息如下：
 56 | 
 57 | |  评测数据集 | 考试科目  | 题数  | 
 58 | |:-------:|:-------:|:-------:|
 59 | |   CMMLU  | Computer science | 204 |
 60 | |   CMMLU  | Computer security | 171 |
 61 | |   CMMLU  | Machine learning | 122 |
 62 | | CEval   | College programming | 37 |
 63 | | CEval   | Computer architecture | 21 |
 64 | | CEval   | Computernetwork | 19 |
 65 | 
 66 | 
 67 | 我们分别测试了 Zero-shot 和 Five-shot 的结果，我们的 7B 和 14B 系列模型可以取得在测试的模型中最好的成绩，更多的测试后续也会放出。
 68 | 
 69 | |Base 模型|Zero-shot 得分|Five-shot 得分|
 70 | |:-------:|:-------:|:-------:|
 71 | |**DevOps-Model-14B-Base**| **70.73** | **73.00** |
 72 | |Qwen-14B-Base| 69.16 | 71.25  |
 73 | |Baichuan2-13B-Base| 55.75 | 61.15 |
 74 | |**DevOps-Model-7B-Base**| **62.72** | **62.02** |
 75 | |Qwen-7B-Base| 55.75 | 56.00 | 
 76 | |Baichuan2-7B-Base| 49.30 | 55.4 |
 77 | |Internlm-7B-Base| 47.56 | 52.6 |
 78 | <br>
 79 | 
 80 | |Chat 模型|Zero-shot 得分|Five-shot 得分|
 81 | |:-------:|:-------:|:-------:|
 82 | |**DevOps-Model-14B-Chat**| **74.04** | **75.96** |
 83 | |Qwen-14B-Chat| 69.16 | 70.03 |
 84 | |Baichuan2-13B-Chat| 52.79 | 55.23 |
 85 | |**DevOps-Model-7B-Chat**| **62.20** | **64.11** |
 86 | |Qwen-7B-Chat| 46.00 | 52.44 |
 87 | |Baichuan2-7B-Chat| 52.56 | 55.75 |
 88 | |Internlm-7B-Chat| 52.61 | 55.75 |
 89 | 
 90 | <br>
 91 |  <br>
 92 | 
 93 | # 快速使用
 94 | 我们提供简单的示例来说明如何利用 🤗 Transformers 快速使用 Devops-Model-Chat 模型。
 95 | 
 96 | ## 安装依赖
 97 | 
 98 | ```bash
 99 | pip install -r requirements.txt
100 | ```
101 | 
102 | ## Chat 模型推理示例
103 | 
104 | ```python
105 | from transformers import AutoModelForCausalLM, AutoTokenizer
106 | from transformers.generation import GenerationConfig
107 | 
108 | tokenizer = AutoTokenizer.from_pretrained("path_to_DevOps-Model-Chat", trust_remote_code=True)
109 | 
110 | model = AutoModelForCausalLM.from_pretrained("path_to_DevOps-Model-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
111 | 
112 | # 指定 generation_config
113 | model.generation_config = GenerationConfig.from_pretrained("path_to_DevOps-Model-Chat", trust_remote_code=True)
114 | 
115 | # 第一轮对话
116 | resp, hist = model.chat(query='你是谁', tokenizer=tokenizer, history=None)
117 | print(resp)
118 | # 我是 DevOps-Model，一个由蚂蚁集团平台技术事业群风险智能团队和北京大学联合研发的人工智能机器人，可以与用户进行自然语言交互，并协助解答 DevOps 全生命周期中的各种问题。如果您有任何需要协助的问题或者想要进行闲聊，都可以和我交流哦。
119 | 
120 | # 第二轮对话
121 | resp2, hist2 = model.chat(query='Java 中 HashMap 和 Hashtable 有什么区别', tokenizer=tokenizer, history=hist)
122 | print(resp2)
123 | # HashMap 和 Hashtable 都是 Java 中常用的哈希表实现，它们的主要区别在于：
124 | # 1. Hashtable 是线程安全的，而 HashMap 不是线程安全的，因此在多线程环境下，Hashtable 的性能更稳定。
125 | # 2. Hashtable 中的方法都是同步的，而 HashMap 的方法不是同步的，因此在多线程环境下，Hashtable 的性能更好。
126 | # 3. Hashtable 中的 key 和 value 都必须实现 Serializable 接口，而 HashMap 中的 key 和 value 可以是任何对象，包括基本数据类型。
127 | # 4. Hashtable 的初始容量是 11，而 HashMap 的初始容量是 16。
128 | # 总之，如果需要在多线程环境下使用哈希表，并且需要保证线程安全，那么应该使用 Hashtable；如果不需要考虑线程安全，或者需要快速地进行哈希表操作，那么应该使用 HashMap。
129 | 
130 | # 第三轮对话
131 | resp3, hist3 = model.chat(query='线程安全代表什么', tokenizer=tokenizer, history=hist2)
132 | print(resp3)
133 | # 线程安全是指在多线程环境下，程序能够正确地处理并发访问，并且不会出现数据竞争、死锁、饥饿等异常情况。线程安全的程序可以保证在不同的线程之间共享同一个数据结构时，数据的正确性和一致性。线程安全的实现通常需要使用同步机制，如锁、原子操作等，来保证对共享数据的访问是线程安全的。在 Java 中，可以通过 synchronized 关键字、Lock 接口等机制来实现线程安全。
134 | ```
135 | 
136 | 
137 | # 模型训练
138 | 
139 | ## 数据准备
140 | 代码内部通过调用 datasets.load_dataset 读取数据，支持 load_dataset 所支持的数据读取方式，比如 json，csv，自定义读取脚本等方式（但推荐数据准备为 jsonl 格式的文件）。然后还需要更新 `data/dataset_info.json` 文件，具体可以参考 `data/README.md`。
141 | 
142 | ## 预训练
143 | 如果收集了一批文档之类的语料（比如公司内部产品的文档）想要在 devopspal 模型上加训，可以执行 `scripts/devops-model-pt.sh` 来发起一次加训来让模型学习到这批文档的知识，具体代码如下:
144 | 
145 | ```bash
146 | set -v 
147 | 
148 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
149 |     --deepspeed conf/deepspeed_config.json \    # deepspeed 配置地址
150 | 	--stage pt \    # 代表执行 pretrain
151 |     --model_name_or_path path_to_model \    # huggingface下载的 devopspal 模型地址
152 |     --do_train \
153 |     --report_to 'tensorboard' \
154 |     --dataset your_corpus \    # 数据集名字，要和在 dataset_info.json 中定义的一致
155 |     --template default \    # template，pretrain 就是 default
156 |     --finetuning_type full \  # 全量或者 lora
157 |     --output_dir path_to_output_checkpoint_path \    # 模型 checkpoint 保存的路径
158 |     --overwrite_cache \
159 |     --per_device_train_batch_size 8 \
160 |     --per_device_eval_batch_size 8 \
161 |     --gradient_accumulation_steps 1 \
162 |     --lr_scheduler_type cosine \
163 |     --warmup_ratio 0.05 \
164 |     --evaluation_strategy steps \
165 |     --logging_steps 10 \
166 |     --max_steps 1000 \
167 |     --save_steps 1000 \
168 |     --eval_steps 1000 \
169 |     --learning_rate 5e-6 \
170 |     --plot_loss \
171 |     --max_source_length=2048 \
172 |     --dataloader_num_workers 8 \
173 |     --val_size 0.01 \
174 |     --bf16 \
175 |     --overwrite_output_dir
176 | ```
177 | 
178 | 使用者可以在这个基础上调整来发起自己的训练，更加详细的可配置项建议通过 `python src/train_bash.py -h` 来获取完整的参数列表。
179 | 
180 | ## 指令微调
181 | 如果收集了一批 QA 数据想要针对 devopspal 再进行对齐的话，可以执行 `scripts/devops-model-sft.sh` 来发起一次加训来让模型在收集到的模型上进行对齐，具体代码如下:
182 | ```bash
183 | set -v 
184 | 
185 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
186 |     --deepspeed conf/deepspeed_config.json \    # deepspeed 配置地址
187 |     --stage sft \    # 代表执行 pretrain
188 |     --model_name_or_path path_to_model \    # huggingface下载的模型地址
189 |     --do_train \
190 |     --report_to 'tensorboard' \
191 |     --dataset your_corpus \    # 数据集名字，要和在 dataset_info.json 中定义的一致
192 |     --template chatml \    # template qwen 模型固定写 chatml
193 |     --finetuning_type full \    # 全量或者 lora
194 |     --output_dir /mnt/llm/devopspal/model/trained \     # 模型 checkpoint 保存的路径
195 |     --overwrite_cache \
196 |     --per_device_train_batch_size 8 \
197 |     --per_device_eval_batch_size 8 \
198 |     --gradient_accumulation_steps 1 \
199 |     --lr_scheduler_type cosine \
200 |     --warmup_ratio 0.05 \
201 |     --evaluation_strategy steps \
202 |     --logging_steps 10 \
203 |     --max_steps 1000 \
204 |     --save_steps 100 \
205 |     --eval_steps 100 \
206 |     --learning_rate 5e-5 \
207 |     --plot_loss \
208 |     --max_source_length=2048 \
209 |     --dataloader_num_workers 8 \
210 |     --val_size 0.01 \
211 |     --bf16 \
212 |     --overwrite_output_dir
213 | ```
214 | 
215 | 使用者可以在这个基础上调整来发起自己的 SFT 训练，更加详细的可配置项建议通过 `python src/train_bash.py -h` 来获取完整的参数列表。
216 | 
217 | ## 量化
218 | 我们将会提供了 DevOps-Model-Chat 系列的量化模型，当然也可以通过以下代码来量化自己加训过的模型
219 | 
220 | ```python
221 | from transformers import AutoModelForCausalLM, AutoTokenizer
222 | from optimum.gptq import GPTQQuantizer, load_quantized_model
223 | import torch
224 | 
225 | # 加载模型
226 | model_name = "path_of_your_model"
227 | tokenizer = AutoTokenizer.from_pretrained(model_name)
228 | model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
229 | 
230 | # 加载数据
231 | # todo
232 | 
233 | # 开始量化
234 | quantizer = GPTQQuantizer(bits=4, dataset="c4", block_name_to_quantize = "model.decoder.layers", model_seqlen = 2048)
235 | quantized_model = quantizer.quantize_model(model, tokenizer)
236 | 
237 | # 保存量化后的模型
238 | out_dir = 'save_path_of_your_quantized_model'
239 | quantized_model.save_quantized(out_dir)
240 | ```
241 | 
242 | 
243 | # 联系我们
244 | ![](https://github.com/codefuse-ai/CodeFuse-DevOps-Model/blob/main/imgs/wechat3.png)
245 | 
246 | 
247 | # 免责声明
248 | 由于语言模型的特性，模型生成的内容可能包含幻觉或者歧视性言论。请谨慎使用 DevOps-Model 系列模型生成的内容。
249 | 如果要公开使用或商用该模型服务，请注意服务方需承担由此产生的不良影响或有害言论的责任，本项目开发者不承担任何由使用本项目（包括但不限于数据、模型、代码等）导致的危害或损失。
250 | 
251 | # 引用
252 | 如果使用本项目的代码或模型，请引用本项目论文：
253 | 
254 | 链接：[DevOps-Model](https://arxiv.org)
255 | 
256 | ```
257 | @article{devopspal2023,
258 |   title={},
259 |   author={},
260 |   journal={arXiv preprint arXiv},
261 |   year={2023}
262 | }
263 | ```
264 | 
265 | # 致谢
266 | 本项目参考了以下开源项目，在此对相关项目和研究开发人员表示感谢。
267 | - [LLaMA-Efficient-Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning)
268 | - [QwenLM](https://github.com/QwenLM)
269 | 


--------------------------------------------------------------------------------
/src/llmtuner/tuner/core/parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import datasets
  5 | import transformers
  6 | from typing import Any, Dict, Optional, Tuple
  7 | from transformers import HfArgumentParser, Seq2SeqTrainingArguments
  8 | from transformers.trainer_utils import get_last_checkpoint
  9 | 
 10 | from llmtuner.extras.logging import get_logger
 11 | from llmtuner.hparams import (
 12 |     ModelArguments,
 13 |     DataArguments,
 14 |     FinetuningArguments,
 15 |     GeneratingArguments,
 16 |     GeneralArguments
 17 | )
 18 | 
 19 | 
 20 | logger = get_logger(__name__)
 21 | 
 22 | 
 23 | def _parse_args(parser: HfArgumentParser, args: Optional[Dict[str, Any]] = None) -> Tuple[Any]:
 24 |     if args is not None:
 25 |         return parser.parse_dict(args)
 26 |     elif len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"):
 27 |         return parser.parse_yaml_file(os.path.abspath(sys.argv[1]))
 28 |     elif len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
 29 |         return parser.parse_json_file(os.path.abspath(sys.argv[1]))
 30 |     else:
 31 |         return parser.parse_args_into_dataclasses()
 32 | 
 33 | 
 34 | def parse_train_args(
 35 |     args: Optional[Dict[str, Any]] = None
 36 | ) -> Tuple[
 37 |     ModelArguments,
 38 |     DataArguments,
 39 |     Seq2SeqTrainingArguments,
 40 |     FinetuningArguments,
 41 |     GeneratingArguments,
 42 |     GeneralArguments
 43 | ]:
 44 |     parser = HfArgumentParser((
 45 |         ModelArguments,
 46 |         DataArguments,
 47 |         Seq2SeqTrainingArguments,
 48 |         FinetuningArguments,
 49 |         GeneratingArguments,
 50 |         GeneralArguments
 51 |     ))
 52 |     return _parse_args(parser, args)
 53 | 
 54 | 
 55 | def parse_infer_args(
 56 |     args: Optional[Dict[str, Any]] = None
 57 | ) -> Tuple[
 58 |     ModelArguments,
 59 |     DataArguments,
 60 |     FinetuningArguments,
 61 |     GeneratingArguments
 62 | ]:
 63 |     parser = HfArgumentParser((
 64 |         ModelArguments,
 65 |         DataArguments,
 66 |         FinetuningArguments,
 67 |         GeneratingArguments
 68 |     ))
 69 |     return _parse_args(parser, args)
 70 | 
 71 | 
 72 | def get_train_args(
 73 |     args: Optional[Dict[str, Any]] = None
 74 | ) -> Tuple[
 75 |     ModelArguments,
 76 |     DataArguments,
 77 |     Seq2SeqTrainingArguments,
 78 |     FinetuningArguments,
 79 |     GeneratingArguments,
 80 |     GeneralArguments
 81 | ]:
 82 |     model_args, data_args, training_args, finetuning_args, generating_args, general_args = parse_train_args(args)
 83 | 
 84 |     # Setup logging
 85 |     if training_args.should_log:
 86 |         # The default of training_args.log_level is passive, so we set log level at info here to have that default.
 87 |         transformers.utils.logging.set_verbosity_info()
 88 | 
 89 |     log_level = training_args.get_process_log_level()
 90 |     datasets.utils.logging.set_verbosity(log_level)
 91 |     transformers.utils.logging.set_verbosity(log_level)
 92 |     transformers.utils.logging.enable_default_handler()
 93 |     transformers.utils.logging.enable_explicit_format()
 94 | 
 95 |     # Check arguments (do not check finetuning_args since it may be loaded from checkpoints)
 96 |     data_args.init_for_training()
 97 | 
 98 |     if general_args.stage != "sft" and training_args.predict_with_generate:
 99 |         raise ValueError("`predict_with_generate` cannot be set as True except SFT.")
100 | 
101 |     if general_args.stage == "sft" and training_args.do_predict and not training_args.predict_with_generate:
102 |         raise ValueError("Please enable `predict_with_generate` to save model predictions.")
103 | 
104 |     if general_args.stage in ["rm", "ppo"] and finetuning_args.finetuning_type != "lora":
105 |         raise ValueError("RM and PPO stages can only be performed with the LoRA method.")
106 | 
107 |     if general_args.stage in ["rm", "ppo"] and training_args.resume_from_checkpoint is not None:
108 |         raise ValueError("RM and PPO stages do not support `resume_from_checkpoint`.")
109 | 
110 |     if general_args.stage in ["ppo", "dpo"] and not training_args.do_train:
111 |         raise ValueError("PPO and DPO stages can only be performed at training.")
112 | 
113 |     if general_args.stage == "ppo" and model_args.reward_model is None:
114 |         raise ValueError("Reward model is necessary for PPO training.")
115 | 
116 |     if general_args.stage == "ppo" and data_args.streaming:
117 |         raise ValueError("Streaming mode does not suppport PPO training currently.")
118 | 
119 |     if training_args.max_steps == -1 and data_args.streaming:
120 |         raise ValueError("Please specify `max_steps` in streaming mode.")
121 | 
122 |     if data_args.val_size > 1e-6 and data_args.val_size < 1 and data_args.streaming:
123 |         raise ValueError("Streaming mode should have an integer val size.")
124 | 
125 |     if training_args.do_train and training_args.predict_with_generate:
126 |         raise ValueError("`predict_with_generate` cannot be set as True while training.")
127 | 
128 |     if training_args.do_train and finetuning_args.finetuning_type == "lora" and finetuning_args.lora_target is None:
129 |         raise ValueError("Please specify `lora_target` in LoRA training.")
130 | 
131 |     if model_args.quantization_bit is not None and finetuning_args.finetuning_type != "lora":
132 |         raise ValueError("Quantization is only compatible with the LoRA method.")
133 | 
134 |     if model_args.checkpoint_dir is not None:
135 |         if finetuning_args.finetuning_type != "lora":
136 |             if len(model_args.checkpoint_dir) != 1:
137 |                 raise ValueError("Only LoRA tuning accepts multiple checkpoints.")
138 |         elif model_args.quantization_bit is not None and len(model_args.checkpoint_dir) != 1:
139 |                 raise ValueError("Quantized model only accepts a single checkpoint.")
140 | 
141 |     if model_args.quantization_bit is not None and (not training_args.do_train):
142 |         logger.warning("Evaluating model in 4/8-bit mode may cause lower scores.")
143 | 
144 |     if training_args.do_train and (not training_args.fp16) and (not training_args.bf16):
145 |         logger.warning("We recommend enable mixed precision training.")
146 | 
147 |     # postprocess data_args
148 |     if data_args.max_samples is not None and data_args.streaming:
149 |         logger.warning("`max_samples` is incompatible with `streaming`. Disabling max_samples.")
150 |         data_args.max_samples = None
151 | 
152 |     # postprocess training_args
153 |     if (
154 |         training_args.local_rank != -1
155 |         and training_args.ddp_find_unused_parameters is None
156 |         and finetuning_args.finetuning_type == "lora"
157 |     ):
158 |         logger.warning("`ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.")
159 |         training_args_dict = training_args.to_dict()
160 |         training_args_dict.update(dict(ddp_find_unused_parameters=False))
161 |         training_args = Seq2SeqTrainingArguments(**training_args_dict)
162 | 
163 |     if (
164 |         training_args.resume_from_checkpoint is None
165 |         and training_args.do_train
166 |         and os.path.isdir(training_args.output_dir)
167 |         and not training_args.overwrite_output_dir
168 |     ):
169 |         last_checkpoint = get_last_checkpoint(training_args.output_dir)
170 |         if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
171 |             raise ValueError("Output directory already exists and is not empty. Use `overwrite_output_dir`.")
172 | 
173 |         if last_checkpoint is not None:
174 |             training_args_dict = training_args.to_dict()
175 |             training_args_dict.update(dict(resume_from_checkpoint=last_checkpoint))
176 |             training_args = Seq2SeqTrainingArguments(**training_args_dict)
177 |             logger.info(
178 |                 "Resuming from checkpoint. Change `output_dir` or use `overwrite_output_dir` to avoid."
179 |             )
180 | 
181 |     # postprocess model_args
182 |     if training_args.bf16:
183 |         if not torch.cuda.is_bf16_supported():
184 |             raise ValueError("Current device does not support bf16 training.")
185 |         model_args.compute_dtype = torch.bfloat16
186 |     else:
187 |         model_args.compute_dtype = torch.float16
188 | 
189 |     # transfer training stage to dataset stage
190 |     dataset_stage = general_args.stage
191 |     if general_args.stage == "ppo":
192 |         dataset_stage = "sft"
193 |     elif general_args.stage == "dpo":
194 |         dataset_stage = "rm"
195 | 
196 |     for dataset_attr in data_args.dataset_list:
197 |         if dataset_attr.stage and dataset_attr.stage != dataset_stage:
198 |             raise ValueError("Dataset {} is not supported for the stage {}"
199 |                              .format(dataset_attr.dataset_name, general_args.stage))
200 | 
201 |     model_args.model_max_length = data_args.max_source_length + data_args.max_target_length
202 | 
203 |     # Log on each process the small summary:
204 |     logger.info("Process rank: {}, device: {}, n_gpu: {}\n  distributed training: {}, compute dtype: {}".format(
205 |         training_args.local_rank, training_args.device, training_args.n_gpu,
206 |         bool(training_args.local_rank != -1), str(model_args.compute_dtype)
207 |     ))
208 |     # logger.info(f"Training/evaluation parameters {training_args}")
209 | 
210 |     # Set seed before initializing model.
211 |     transformers.set_seed(training_args.seed)
212 | 
213 |     return model_args, data_args, training_args, finetuning_args, generating_args, general_args
214 | 
215 | 
216 | def get_infer_args(
217 |     args: Optional[Dict[str, Any]] = None
218 | ) -> Tuple[
219 |     ModelArguments,
220 |     DataArguments,
221 |     FinetuningArguments,
222 |     GeneratingArguments
223 | ]:
224 |     model_args, data_args, finetuning_args, generating_args = parse_infer_args(args)
225 | 
226 |     if model_args.quantization_bit is not None and finetuning_args.finetuning_type != "lora":
227 |         raise ValueError("Quantization is only compatible with the LoRA method.")
228 | 
229 |     if model_args.checkpoint_dir is not None:
230 |         if finetuning_args.finetuning_type != "lora":
231 |             if len(model_args.checkpoint_dir) != 1:
232 |                 raise ValueError("Only LoRA tuning accepts multiple checkpoints.")
233 |         elif model_args.quantization_bit is not None and len(model_args.checkpoint_dir) != 1:
234 |                 raise ValueError("Quantized model only accepts a single checkpoint.")
235 | 
236 |     return model_args, data_args, finetuning_args, generating_args
237 | 


--------------------------------------------------------------------------------
/README_EN.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>
  3 |  DevOps-Model
  4 | </h1>
  5 | </div>
  6 | 
  7 | <p align="center">
  8 | 🤗 <a href="https://huggingface.co/codefuse-ai" target="_blank">Hugging Face</a> • 
  9 | 🤖 <a href="https://modelscope.cn/organization/codefuse-ai" target="_blank">ModelScope</a> </p>
 10 | 
 11 | <div align="center">
 12 | <h4 align="center">
 13 |     <p>
 14 |         <a href="https://github.com/codefuse-ai/CodeFuse-DevOps-Model/blob/main/README.md">中文</a> |
 15 | 	<b>English</b> 
 16 |     <p>
 17 | </h4>
 18 | </div>
 19 | 
 20 | 
 21 | DevOps-Model is a series of **industrial-fist Chinese DevOps large language models**, mainly dedicated to exerting practical value in the field of DevOps. Currently, DevOps-Model can help engineers answer questions encountered in the all DevOps life cycle.
 22 | 
 23 | Based on the Qwen series of models, we output the **Base** model after additional training with high-quality Chinese DevOps corpus, and then output the **Chat** model after alignment with DevOps QA data. Our Base model and Chat model can achieve the best results among models of the same scale based on evaluation data related to the DevOps fields.
 24 | 
 25 | At the same time, we are also building an evaluation benchmark [DevOpsEval](https://github.com/codefuse-ai/codefuse-devops-eval) exclusive to the DevOps field to better evaluate the effect of the DevOps field model.
 26 | <br>
 27 | <br>
 28 | 
 29 | # Update
 30 | - [2023.10.31] Open source DevOps-Model-14B Base and Chat models.
 31 | - [2023.10.30] Open source DevOps-Model-7B Base and Chat models.
 32 | 
 33 | 
 34 | # Download
 35 | Open source models and download links are shown in the table below:
 36 | 🤗 Huggingface 
 37 | 
 38 | |         | Base Model  | Chat Model | Chat Model(Int4) |
 39 | |:-------:|:-------:|:-------:|:-----------------:|
 40 | | 7B      | Coming Soon | Coming Soon| Coming Soon|
 41 | | 14B     | Coming Soon | Coming Soon| Coming Soon |
 42 | 
 43 | 🤖 ModelScope 
 44 | 
 45 | |         | Base Model  | Chat Model | Chat Model(Int4) |
 46 | |:-------:|:-------:|:-------:|:-----------------:|
 47 | | 7B      |  [DevOps-Model-7B-Base](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-7B-Chat/summary) | [DevOps-Model-7B-Chat](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-7B-Chat/summary) | Coming Soon|
 48 | | 14B     | [DevOps-Model-14B-Base](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-14B-Base/summary) | [DevOps-Model-14B-Chat](https://modelscope.cn/models/codefuse-ai/CodeFuse-DevOps-Model-14B-Chat/summary) | Coming Soon |
 49 | 
 50 | 
 51 | # Evaluation
 52 | We first selected a total of six exams related to DevOps in the two evaluation data sets of CMMLU and CEval. There are a total of 574 multiple-choice questions. The specific information is as follows:
 53 | 
 54 | | Evaluation dataset | Exam subjects | Number of questions |
 55 | |:-------:|:-------:|:-------:|
 56 | |   CMMLU  | Computer science | 204 |
 57 | |   CMMLU  | Computer security | 171 |
 58 | |   CMMLU  | Machine learning | 122 |
 59 | | CEval   | College programming | 37 |
 60 | | CEval   | Computer architecture | 21 |
 61 | | CEval   | Computernetwork | 19 |
 62 | 
 63 | 
 64 | We tested the results of Zero-shot and Five-shot respectively. Our 7B and 14B series models can achieve the best results among the tested models. More tests will be released later.
 65 | 
 66 | |Base Model|Zero-shot Score|Five-shot Score|
 67 | |:-------:|:-------:|:-------:|
 68 | |**DevOps-Model-14B-Base**| **70.73** | **73.00** |
 69 | |Qwen-14B-Base| 69.16 | 71.25  |
 70 | |Baichuan2-13B-Base| 55.75 | 61.15 |
 71 | |**DevOps-Model-7B-Base**| **62.72** | **62.02** |
 72 | |Qwen-7B-Base| 55.75 | 56.00 | 
 73 | |Baichuan2-7B-Base| 49.30 | 55.4 |
 74 | |Internlm-7B-Base| 47.56 | 52.6 |
 75 | <br>
 76 | 
 77 | |Chat Model|Zero-shot Score|Five-shot Score|
 78 | |:-------:|:-------:|:-------:|
 79 | |**DevOps-Model-14B-Chat**| **74.04** | **75.96** |
 80 | |Qwen-14B-Chat| 69.16 | 70.03 |
 81 | |Baichuan2-13B-Chat| 52.79 | 55.23 |
 82 | |**DevOps-Model-7B-Chat**| **62.20** | **64.11** |
 83 | |Qwen-7B-Chat| 46.00 | 52.44 |
 84 | |Baichuan2-7B-Chat| 52.56 | 55.75 |
 85 | |Internlm-7B-Chat| 52.61 | 55.75 |
 86 | 
 87 | <br>
 88 |  <br>
 89 | 
 90 | # Quickstart
 91 | We provide simple examples to illustrate how to quickly use Devops-Model-Chat models with 🤗 Transformers.
 92 | 
 93 | ## Requirement
 94 | 
 95 | ```bash
 96 | pip install -r requirements.txt
 97 | ```
 98 | 
 99 | ## Chat Model Example
100 | 
101 | ```python
102 | from transformers import AutoModelForCausalLM, AutoTokenizer
103 | from transformers.generation import GenerationConfig
104 | 
105 | tokenizer = AutoTokenizer.from_pretrained("path_to_DevOps-Model-Chat", trust_remote_code=True)
106 | 
107 | model = AutoModelForCausalLM.from_pretrained("path_to_DevOps-Model-Chat", device_map="auto", trust_remote_code=True, bf16=True).eval()
108 | 
109 | model.generation_config = GenerationConfig.from_pretrained("path_to_DevOps-Model-Chat", trust_remote_code=True)
110 | 
111 | resp2, hist2 = model.chat(query='What is the difference between HashMap and Hashtable in Java', tokenizer=tokenizer, history=hist)
112 | ```
113 | 
114 | 
115 | # Model Finetune
116 | 
117 | ## Data
118 | The code internally reads data by calling `datasets.load_dataset`, and supports the data reading methods supported by `load_dataset`, such as json, csv, custom reading scripts, etc. (but it is recommended that the data be prepared in jsonl format files). Then you also need to update the `data/dataset_info.json` file. For details, please refer to `data/README.md`.
119 | 
120 | 
121 | ## Pretrain
122 | If you have collected a batch of documents and other corpus (such as company internal product documents) and want to train based on our model, you can execute `scripts/devops-model-pt.sh` to initiate an additional training to let the model learn The specific codes for the knowledge of this batch of documents are as follows:
123 | 
124 | ```bash
125 | set -v 
126 | 
127 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
128 |     --deepspeed conf/deepspeed_config.json 
129 | 	--stage pt \
130 |     --model_name_or_path path_to_model \
131 |     --do_train \
132 |     --report_to 'tensorboard' \
133 |     --dataset your_corpus \
134 |     --template default \
135 |     --finetuning_type full \
136 |     --output_dir path_to_output_checkpoint_path \
137 |     --overwrite_cache \
138 |     --per_device_train_batch_size 8 \
139 |     --per_device_eval_batch_size 8 \
140 |     --gradient_accumulation_steps 1 \
141 |     --lr_scheduler_type cosine \
142 |     --warmup_ratio 0.05 \
143 |     --evaluation_strategy steps \
144 |     --logging_steps 10 \
145 |     --max_steps 1000 \
146 |     --save_steps 1000 \
147 |     --eval_steps 1000 \
148 |     --learning_rate 5e-6 \
149 |     --plot_loss \
150 |     --max_source_length 2048 \
151 |     --dataloader_num_workers 8 \
152 |     --val_size 0.01 \
153 |     --bf16 \
154 |     --overwrite_output_dir
155 | ```
156 | 
157 | Users can adjust on this basis to initiate their own training. For more detailed configurations, it is recommended to obtain the complete parameter list through `python src/train_bash.py -h`.
158 | 
159 | ## Supervised Fine-Tuning
160 | If you collect a batch of QA data and want to align it for devopspal, you can execute `scripts/devops-model-sft.sh` to initiate an additional training to align the model on the collected model. The specific code is as follows:
161 | 
162 | ```bash
163 | set -v 
164 | 
165 | torchrun --nproc_per_node=8 --nnodes=$WORLD_SIZE --master_port=$MASTER_PORT --master_addr=$MASTER_ADDR --node_rank=$RANK src/train_bash.py \
166 |     --deepspeed conf/deepspeed_config.json \
167 |     --stage sft \
168 |     --model_name_or_path path_to_model \
169 |     --do_train \
170 |     --report_to 'tensorboard' \
171 |     --dataset your_corpus \
172 |     --template chatml \
173 |     --finetuning_type full \
174 |     --output_dir /mnt/llm/devopspal/model/trained \
175 |     --overwrite_cache \
176 |     --per_device_train_batch_size 8 \
177 |     --per_device_eval_batch_size 8 \
178 |     --gradient_accumulation_steps 1 \
179 |     --lr_scheduler_type cosine \
180 |     --warmup_ratio 0.05 \
181 |     --evaluation_strategy steps \
182 |     --logging_steps 10 \
183 |     --max_steps 1000 \
184 |     --save_steps 100 \
185 |     --eval_steps 100 \
186 |     --learning_rate 5e-5 \
187 |     --plot_loss \
188 |     --max_source_length 2048 \
189 |     --dataloader_num_workers 8 \
190 |     --val_size 0.01 \
191 |     --bf16 \
192 |     --overwrite_output_dir
193 | ```
194 | 
195 | Users can adjust on this basis to initiate their own SFT. For more detailed configurations, it is recommended to obtain the complete parameter list through `python src/train_bash.py -h`.
196 | 
197 | ## Quantilization
198 | We will provide quantitative models of the DevOps-Model-Chat series. Of course, you can also quantify your own trained models through the following code
199 | 
200 | ```python
201 | from transformers import AutoModelForCausalLM, AutoTokenizer
202 | from optimum.gptq import GPTQQuantizer, load_quantized_model
203 | import torch
204 | 
205 | model_name = "path_of_your_model"
206 | tokenizer = AutoTokenizer.from_pretrained(model_name)
207 | model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
208 | 
209 | quantizer = GPTQQuantizer(bits=4, dataset="c4", block_name_to_quantize = "model.decoder.layers", model_seqlen = 2048)
210 | quantized_model = quantizer.quantize_model(model, tokenizer)
211 | 
212 | out_dir = 'save_path_of_your_quantized_model'
213 | quantized_model.save_quantized(out_dir)
214 | ```
215 | 
216 | # Contact Us
217 | ![](https://github.com/codefuse-ai/CodeFuse-DevOps-Model/blob/main/imgs/qrcode.png)
218 | 
219 | 
220 | # Disclaimer
221 | Due to the characteristics of language models, the content generated by the model may contain hallucinations or discriminatory remarks. Please use the content generated by the DevOps-Model family of models with caution.
222 | If you want to use this model service publicly or commercially, please note that the service provider needs to bear the responsibility for the adverse effects or harmful remarks caused by it. The developer of this project does not assume any responsibility for any consequences caused by the use of this project (including but not limited to data, models, codes, etc.) ) resulting in harm or loss.
223 | 
224 | 
225 | # Acknowledgments
226 | This project refers to the following open source projects, and I would like to express my gratitude to the relevant projects and research and development personnel.
227 | - [LLaMA-Efficient-Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning)
228 | - [QwenLM](https://github.com/QwenLM)
229 | 


--------------------------------------------------------------------------------