├── src ├── llmtuner │ ├── extras │ │ ├── __init__.py │ │ ├── patches │ │ │ └── __init__.py │ │ ├── packages.py │ │ ├── logging.py │ │ ├── ploting.py │ │ └── misc.py │ ├── api │ │ ├── __init__.py │ │ └── protocol.py │ ├── chat │ │ └── __init__.py │ ├── eval │ │ ├── __init__.py │ │ ├── template.py │ │ └── evaluator.py │ ├── train │ │ ├── pt │ │ │ ├── __init__.py │ │ │ └── workflow.py │ │ ├── rm │ │ │ ├── __init__.py │ │ │ ├── metric.py │ │ │ ├── collator.py │ │ │ ├── workflow.py │ │ │ └── trainer.py │ │ ├── dpo │ │ │ ├── __init__.py │ │ │ ├── collator.py │ │ │ ├── workflow.py │ │ │ └── trainer.py │ │ ├── ppo │ │ │ ├── __init__.py │ │ │ ├── utils.py │ │ │ └── workflow.py │ │ ├── sft │ │ │ ├── __init__.py │ │ │ ├── metric.py │ │ │ ├── trainer.py │ │ │ └── workflow.py │ │ ├── __init__.py │ │ ├── tuner.py │ │ └── utils.py │ ├── webui │ │ ├── __init__.py │ │ ├── components │ │ │ ├── __init__.py │ │ │ ├── infer.py │ │ │ ├── chatbot.py │ │ │ ├── top.py │ │ │ ├── eval.py │ │ │ ├── export.py │ │ │ ├── data.py │ │ │ └── train.py │ │ ├── css.py │ │ ├── manager.py │ │ ├── engine.py │ │ ├── interface.py │ │ ├── utils.py │ │ ├── common.py │ │ └── chatter.py │ ├── data │ │ ├── __init__.py │ │ └── utils.py │ ├── hparams │ │ ├── __init__.py │ │ ├── evaluation_args.py │ │ ├── generating_args.py │ │ └── model_args.py │ ├── model │ │ ├── __init__.py │ │ ├── loader.py │ │ ├── utils.py │ │ └── adapter.py │ └── __init__.py ├── export_model.py ├── evaluate.py ├── train_bash.py ├── train_web.py ├── web_demo.py ├── api_demo.py └── cli_demo.py ├── pics └── logo.jpg ├── evaluation ├── mmlu │ ├── mmlu.zip │ ├── mmlu.py │ └── mapping.json ├── ceval │ ├── ceval.zip │ ├── mapping.json │ └── ceval.py └── cmmlu │ ├── cmmlu.zip │ ├── cmmlu.py │ └── mapping.json ├── cache └── user.config ├── data └── dataset_info.json ├── start_web_demo.sh ├── requirements.txt ├── start_train.sh ├── tests ├── cal_flops.py ├── quantize.py ├── cal_lr.py ├── loftq_init.py ├── llamafy_baichuan2.py └── llamafy_qwen.py ├── setup.py └── README.md /src/llmtuner/extras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmtuner/extras/patches/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/llmtuner/api/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.api.app import create_app 2 | -------------------------------------------------------------------------------- /pics/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiweihu1103/AgriMa/HEAD/pics/logo.jpg -------------------------------------------------------------------------------- /src/llmtuner/chat/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.chat.chat_model import ChatModel 2 | -------------------------------------------------------------------------------- /src/llmtuner/eval/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.eval.evaluator import Evaluator 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/pt/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.pt.workflow import run_pt 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/rm/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.rm.workflow import run_rm 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/dpo/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.dpo.workflow import run_dpo 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/ppo/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.ppo.workflow import run_ppo 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/sft/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.sft.workflow import run_sft 2 | -------------------------------------------------------------------------------- /src/llmtuner/train/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.train.tuner import export_model, run_exp 2 | -------------------------------------------------------------------------------- /src/llmtuner/webui/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.webui.interface import create_ui, create_web_demo 2 | -------------------------------------------------------------------------------- /evaluation/mmlu/mmlu.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiweihu1103/AgriMa/HEAD/evaluation/mmlu/mmlu.zip -------------------------------------------------------------------------------- /evaluation/ceval/ceval.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiweihu1103/AgriMa/HEAD/evaluation/ceval/ceval.zip -------------------------------------------------------------------------------- /evaluation/cmmlu/cmmlu.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiweihu1103/AgriMa/HEAD/evaluation/cmmlu/cmmlu.zip -------------------------------------------------------------------------------- /cache/user.config: -------------------------------------------------------------------------------- 1 | { 2 | "lang": "zh", 3 | "last_model": null, 4 | "path_dict": {}, 5 | "cache_dir": null 6 | } -------------------------------------------------------------------------------- /src/export_model.py: -------------------------------------------------------------------------------- 1 | from llmtuner import export_model 2 | 3 | 4 | def main(): 5 | export_model() 6 | 7 | 8 | if __name__ == "__main__": 9 | main() 10 | -------------------------------------------------------------------------------- /src/evaluate.py: -------------------------------------------------------------------------------- 1 | from llmtuner import Evaluator 2 | 3 | 4 | def main(): 5 | evaluator = Evaluator() 6 | evaluator.eval() 7 | 8 | 9 | if __name__ == "__main__": 10 | main() 11 | -------------------------------------------------------------------------------- /src/train_bash.py: -------------------------------------------------------------------------------- 1 | from llmtuner import run_exp 2 | 3 | 4 | def main(): 5 | run_exp() 6 | 7 | 8 | def _mp_fn(index): 9 | # For xla_spawn (TPUs) 10 | main() 11 | 12 | 13 | if __name__ == "__main__": 14 | main() 15 | -------------------------------------------------------------------------------- /data/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "agrima_train": { 3 | "file_name": "agrima_instruction.json", 4 | "columns": { 5 | "prompt": "instruction", 6 | "query": "input", 7 | "response": "output" 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/llmtuner/data/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.data.loader import get_dataset 2 | from llmtuner.data.preprocess import preprocess_dataset 3 | from llmtuner.data.template import get_template_and_fix_tokenizer 4 | from llmtuner.data.utils import split_dataset 5 | -------------------------------------------------------------------------------- /src/train_web.py: -------------------------------------------------------------------------------- 1 | from llmtuner import create_ui 2 | 3 | 4 | def main(): 5 | demo = create_ui() 6 | demo.queue() 7 | demo.launch(server_name="0.0.0.0", share=False, inbrowser=True) 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /start_web_demo.sh: -------------------------------------------------------------------------------- 1 | # --adapter_name_or_path checkpoint/agrima-baichuan-7B \ 2 | 3 | CUDA_VISIBLE_DEVICES=0 python src/web_demo.py \ 4 | --model_name_or_path /data/Users/hzw/model_weights/baichuan-7b \ 5 | --template vicuna \ 6 | --finetuning_type lora -------------------------------------------------------------------------------- /src/llmtuner/hparams/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_args import DataArguments 2 | from .evaluation_args import EvaluationArguments 3 | from .finetuning_args import FinetuningArguments 4 | from .generating_args import GeneratingArguments 5 | from .model_args import ModelArguments 6 | -------------------------------------------------------------------------------- /src/web_demo.py: -------------------------------------------------------------------------------- 1 | from llmtuner import create_web_demo 2 | 3 | 4 | def main(): 5 | demo = create_web_demo() 6 | demo.queue() 7 | demo.launch(server_name="0.0.0.0", share=False, inbrowser=True) 8 | 9 | 10 | if __name__ == "__main__": 11 | main() 12 | -------------------------------------------------------------------------------- /src/llmtuner/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Level: loader > adapter > parser, utils 2 | 3 | from llmtuner.model.loader import load_model_and_tokenizer 4 | from llmtuner.model.parser import get_train_args, get_infer_args, get_eval_args 5 | from llmtuner.model.utils import dispatch_model, get_modelcard_args, load_valuehead_params 6 | -------------------------------------------------------------------------------- /src/llmtuner/train/rm/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Dict, Sequence, Tuple, Union 3 | 4 | 5 | def compute_accuracy(eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]: 6 | preds, _ = eval_preds 7 | return {"accuracy": (preds[0] > preds[1]).sum() / len(preds[0])} 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.13.1 2 | transformers>=4.36.2 3 | datasets>=2.14.3 4 | accelerate>=0.21.0 5 | peft>=0.7.0 6 | trl>=0.7.6 7 | gradio>=3.38.0,<4.0.0 8 | scipy 9 | einops 10 | sentencepiece 11 | protobuf 12 | tiktoken 13 | jieba 14 | rouge-chinese 15 | nltk 16 | uvicorn 17 | pydantic 18 | fastapi 19 | sse-starlette 20 | matplotlib 21 | -------------------------------------------------------------------------------- /src/llmtuner/__init__.py: -------------------------------------------------------------------------------- 1 | # Level: api, webui > chat, eval, train > data, model > extras, hparams 2 | 3 | from llmtuner.api import create_app 4 | from llmtuner.chat import ChatModel 5 | from llmtuner.eval import Evaluator 6 | from llmtuner.train import export_model, run_exp 7 | from llmtuner.webui import create_ui, create_web_demo 8 | 9 | 10 | __version__ = "0.4.0" 11 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/__init__.py: -------------------------------------------------------------------------------- 1 | from llmtuner.webui.components.top import create_top 2 | from llmtuner.webui.components.train import create_train_tab 3 | from llmtuner.webui.components.eval import create_eval_tab 4 | from llmtuner.webui.components.infer import create_infer_tab 5 | from llmtuner.webui.components.export import create_export_tab 6 | from llmtuner.webui.components.chatbot import create_chat_box 7 | -------------------------------------------------------------------------------- /src/api_demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uvicorn 3 | 4 | from llmtuner import ChatModel, create_app 5 | 6 | 7 | def main(): 8 | chat_model = ChatModel() 9 | app = create_app(chat_model) 10 | print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000))) 11 | uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1) 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /src/llmtuner/webui/css.py: -------------------------------------------------------------------------------- 1 | CSS = r""" 2 | .duplicate-button { 3 | margin: auto !important; 4 | color: white !important; 5 | background: black !important; 6 | border-radius: 100vh !important; 7 | } 8 | 9 | .modal-box { 10 | position: fixed !important; 11 | top: 50%; 12 | left: 50%; 13 | transform: translate(-50%, -50%); /* center horizontally */ 14 | max-width: 1000px; 15 | max-height: 750px; 16 | overflow-y: auto; 17 | background-color: var(--input-background-fill); 18 | flex-wrap: nowrap !important; 19 | border: 2px solid black !important; 20 | z-index: 1000; 21 | padding: 10px; 22 | } 23 | 24 | .dark .modal-box { 25 | border: 2px solid white !important; 26 | } 27 | """ 28 | -------------------------------------------------------------------------------- /start_train.sh: -------------------------------------------------------------------------------- 1 | export LOG_PATH=logs/agrima-baichuan-7B.logs 2 | export OUTPUT_DIR=checkpoint/agrima-baichuan-7B 3 | export MODEL_NAME_OR_PATH=/data/Users/hzw/model_weights/baichuan-7b 4 | export TEMPLATE=baichuan 5 | export DATASET=agrima_train 6 | 7 | CUDA_VISIBLE_DEVICES=1 nohup python -u src/train_bash.py \ 8 | --stage sft \ 9 | --do_train \ 10 | --model_name_or_path $MODEL_NAME_OR_PATH \ 11 | --dataset $DATASET \ 12 | --template $TEMPLATE \ 13 | --finetuning_type lora \ 14 | --lora_target W_pack \ 15 | --output_dir $OUTPUT_DIR \ 16 | --overwrite_cache \ 17 | --per_device_train_batch_size 4 \ 18 | --gradient_accumulation_steps 4 \ 19 | --lr_scheduler_type cosine \ 20 | --logging_steps 10 \ 21 | --save_steps 1000 \ 22 | --learning_rate 5e-5 \ 23 | --num_train_epochs 3.0 \ 24 | --plot_loss \ 25 | --fp16 \ 26 | > $LOG_PATH 2>&1 & -------------------------------------------------------------------------------- /src/llmtuner/train/rm/collator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dataclasses import dataclass 3 | from typing import Any, Dict, Sequence 4 | from transformers import DataCollatorWithPadding 5 | 6 | 7 | @dataclass 8 | class PairwiseDataCollatorWithPadding(DataCollatorWithPadding): 9 | r""" 10 | Data collator for pairwise data. 11 | """ 12 | 13 | def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]: 14 | r""" 15 | Pads batched data to the longest sequence in the batch. 16 | 17 | We generate 2 * n examples where the first n examples represent chosen examples and 18 | the last n examples represent rejected examples. 19 | """ 20 | features = [ 21 | { 22 | "input_ids": feature["prompt_ids"] + feature[key], 23 | "attention_mask": [1] * (len(feature["prompt_ids"]) + len(feature[key])) 24 | } 25 | for key in ("chosen_ids", "rejected_ids") for feature in features 26 | ] 27 | return super().__call__(features) 28 | -------------------------------------------------------------------------------- /src/llmtuner/extras/packages.py: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | import importlib.util 3 | 4 | 5 | def is_package_available(name: str) -> bool: 6 | return importlib.util.find_spec(name) is not None 7 | 8 | 9 | def get_package_version(name: str) -> str: 10 | try: 11 | return importlib.metadata.version(name) 12 | except: 13 | return "0.0.0" 14 | 15 | 16 | def is_fastapi_availble(): 17 | return is_package_available("fastapi") 18 | 19 | 20 | def is_flash_attn2_available(): 21 | return is_package_available("flash_attn") and get_package_version("flash_attn").startswith("2") 22 | 23 | 24 | def is_jieba_available(): 25 | return is_package_available("jieba") 26 | 27 | 28 | def is_matplotlib_available(): 29 | return is_package_available("matplotlib") 30 | 31 | 32 | def is_nltk_available(): 33 | return is_package_available("nltk") 34 | 35 | 36 | def is_requests_available(): 37 | return is_package_available("requests") 38 | 39 | 40 | def is_rouge_available(): 41 | return is_package_available("rouge_chinese") 42 | 43 | 44 | def is_starlette_available(): 45 | return is_package_available("sse_starlette") 46 | 47 | 48 | def is_uvicorn_available(): 49 | return is_package_available("uvicorn") 50 | -------------------------------------------------------------------------------- /src/llmtuner/webui/manager.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Dict, List, Set 2 | 3 | if TYPE_CHECKING: 4 | from gradio.components import Component 5 | 6 | 7 | class Manager: 8 | 9 | def __init__(self) -> None: 10 | self.all_elems: Dict[str, Dict[str, "Component"]] = {} 11 | 12 | def get_elem_by_name(self, name: str) -> "Component": 13 | r""" 14 | Example: top.lang, train.dataset 15 | """ 16 | tab_name, elem_name = name.split(".") 17 | return self.all_elems[tab_name][elem_name] 18 | 19 | def get_base_elems(self) -> Set["Component"]: 20 | return { 21 | self.all_elems["top"]["lang"], 22 | self.all_elems["top"]["model_name"], 23 | self.all_elems["top"]["model_path"], 24 | self.all_elems["top"]["adapter_path"], 25 | self.all_elems["top"]["finetuning_type"], 26 | self.all_elems["top"]["quantization_bit"], 27 | self.all_elems["top"]["template"], 28 | self.all_elems["top"]["rope_scaling"], 29 | self.all_elems["top"]["booster"] 30 | } 31 | 32 | def list_elems(self) -> List["Component"]: 33 | return [elem for elems in self.all_elems.values() for elem in elems.values()] 34 | -------------------------------------------------------------------------------- /src/llmtuner/extras/logging.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | 4 | 5 | class LoggerHandler(logging.Handler): 6 | r""" 7 | Logger handler used in Web UI. 8 | """ 9 | 10 | def __init__(self): 11 | super().__init__() 12 | self.log = "" 13 | 14 | def reset(self): 15 | self.log = "" 16 | 17 | def emit(self, record): 18 | if record.name == "httpx": 19 | return 20 | log_entry = self.format(record) 21 | self.log += log_entry 22 | self.log += "\n\n" 23 | 24 | 25 | def get_logger(name: str) -> logging.Logger: 26 | r""" 27 | Gets a standard logger with a stream hander to stdout. 28 | """ 29 | formatter = logging.Formatter( 30 | fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 31 | datefmt="%m/%d/%Y %H:%M:%S" 32 | ) 33 | handler = logging.StreamHandler(sys.stdout) 34 | handler.setFormatter(formatter) 35 | 36 | logger = logging.getLogger(name) 37 | logger.setLevel(logging.INFO) 38 | logger.addHandler(handler) 39 | 40 | return logger 41 | 42 | 43 | def reset_logging() -> None: 44 | r""" 45 | Removes basic config of root logger. (unused in script) 46 | """ 47 | root = logging.getLogger() 48 | list(map(root.removeHandler, root.handlers)) 49 | list(map(root.removeFilter, root.filters)) 50 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/infer.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict 3 | 4 | from llmtuner.webui.components.chatbot import create_chat_box 5 | 6 | if TYPE_CHECKING: 7 | from gradio.components import Component 8 | from llmtuner.webui.engine import Engine 9 | 10 | 11 | def create_infer_tab(engine: "Engine") -> Dict[str, "Component"]: 12 | input_elems = engine.manager.get_base_elems() 13 | elem_dict = dict() 14 | 15 | with gr.Row(): 16 | load_btn = gr.Button() 17 | unload_btn = gr.Button() 18 | 19 | info_box = gr.Textbox(show_label=False, interactive=False) 20 | elem_dict.update(dict(load_btn=load_btn, unload_btn=unload_btn, info_box=info_box)) 21 | 22 | chat_box, chatbot, history, chat_elems = create_chat_box(engine, visible=False) 23 | elem_dict.update(dict(chat_box=chat_box, **chat_elems)) 24 | 25 | load_btn.click( 26 | engine.chatter.load_model, input_elems, [info_box] 27 | ).then( 28 | lambda: gr.update(visible=engine.chatter.loaded), outputs=[chat_box] 29 | ) 30 | 31 | unload_btn.click( 32 | engine.chatter.unload_model, input_elems, [info_box] 33 | ).then( 34 | lambda: ([], []), outputs=[chatbot, history] 35 | ).then( 36 | lambda: gr.update(visible=engine.chatter.loaded), outputs=[chat_box] 37 | ) 38 | 39 | return elem_dict 40 | -------------------------------------------------------------------------------- /src/cli_demo.py: -------------------------------------------------------------------------------- 1 | from llmtuner import ChatModel 2 | from llmtuner.extras.misc import torch_gc 3 | 4 | try: 5 | import platform 6 | if platform.system() != "Windows": 7 | import readline 8 | except ImportError: 9 | print("Install `readline` for a better experience.") 10 | 11 | 12 | def main(): 13 | chat_model = ChatModel() 14 | history = [] 15 | print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.") 16 | 17 | while True: 18 | try: 19 | query = input("\nUser: ") 20 | except UnicodeDecodeError: 21 | print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.") 22 | continue 23 | except Exception: 24 | raise 25 | 26 | if query.strip() == "exit": 27 | break 28 | 29 | if query.strip() == "clear": 30 | history = [] 31 | torch_gc() 32 | print("History has been removed.") 33 | continue 34 | 35 | print("Assistant: ", end="", flush=True) 36 | 37 | response = "" 38 | for new_text in chat_model.stream_chat(query, history): 39 | print(new_text, end="", flush=True) 40 | response += new_text 41 | print() 42 | 43 | history = history + [(query, response)] 44 | 45 | 46 | if __name__ == "__main__": 47 | main() 48 | -------------------------------------------------------------------------------- /tests/cal_flops.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Calculates the flops of pre-trained models. 3 | # Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512 4 | # Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/ 5 | 6 | import fire 7 | import torch 8 | from typing import Optional 9 | from deepspeed.accelerator import get_accelerator # type: ignore 10 | from deepspeed.profiling.flops_profiler import get_model_profile # type: ignore 11 | 12 | from llmtuner import ChatModel 13 | 14 | 15 | def calculate_flops( 16 | model_name_or_path: str, 17 | batch_size: Optional[int] = 1, 18 | seq_length: Optional[int] = 256, 19 | flash_attn: Optional[bool] = False 20 | ): 21 | with get_accelerator().device(0): 22 | chat_model = ChatModel(dict( 23 | model_name_or_path=model_name_or_path, 24 | template="vanilla", 25 | flash_attn=flash_attn 26 | )) 27 | fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device) 28 | input_dict = { 29 | "input_ids": fake_input, 30 | "labels": fake_input.clone() 31 | } 32 | flops, macs, params = get_model_profile( 33 | chat_model.model, 34 | kwargs=input_dict, 35 | print_profile=True, 36 | detailed=True 37 | ) 38 | print("FLOPs:", flops) 39 | print("MACs:", macs) 40 | print("Params:", params) 41 | 42 | 43 | if __name__ == "__main__": 44 | fire.Fire(calculate_flops) 45 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/chatbot.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict, Optional, Tuple 3 | 4 | if TYPE_CHECKING: 5 | from gradio.blocks import Block 6 | from gradio.components import Component 7 | from llmtuner.webui.engine import Engine 8 | 9 | 10 | def create_chat_box( 11 | engine: "Engine", 12 | visible: Optional[bool] = False 13 | ) -> Tuple["Block", "Component", "Component", Dict[str, "Component"]]: 14 | with gr.Box(visible=visible) as chat_box: 15 | chatbot = gr.Chatbot() 16 | history = gr.State([]) 17 | with gr.Row(): 18 | with gr.Column(scale=4): 19 | system = gr.Textbox(show_label=False) 20 | query = gr.Textbox(show_label=False, lines=8) 21 | submit_btn = gr.Button(variant="primary") 22 | 23 | with gr.Column(scale=1): 24 | clear_btn = gr.Button() 25 | gen_kwargs = engine.chatter.generating_args 26 | max_new_tokens = gr.Slider(10, 2048, value=gen_kwargs.max_new_tokens, step=1) 27 | top_p = gr.Slider(0.01, 1, value=gen_kwargs.top_p, step=0.01) 28 | temperature = gr.Slider(0.01, 1.5, value=gen_kwargs.temperature, step=0.01) 29 | 30 | submit_btn.click( 31 | engine.chatter.predict, 32 | [chatbot, query, history, system, max_new_tokens, top_p, temperature], 33 | [chatbot, history], 34 | show_progress=True 35 | ).then( 36 | lambda: gr.update(value=""), outputs=[query] 37 | ) 38 | 39 | clear_btn.click(lambda: ([], []), outputs=[chatbot, history], show_progress=True) 40 | 41 | return chat_box, chatbot, history, dict( 42 | system=system, 43 | query=query, 44 | submit_btn=submit_btn, 45 | clear_btn=clear_btn, 46 | max_new_tokens=max_new_tokens, 47 | top_p=top_p, 48 | temperature=temperature 49 | ) 50 | -------------------------------------------------------------------------------- /src/llmtuner/extras/ploting.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import json 4 | from typing import List, Optional 5 | from transformers.trainer import TRAINER_STATE_NAME 6 | 7 | from llmtuner.extras.logging import get_logger 8 | from llmtuner.extras.packages import is_matplotlib_available 9 | 10 | if is_matplotlib_available(): 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | logger = get_logger(__name__) 15 | 16 | 17 | def smooth(scalars: List[float]) -> List[float]: 18 | r""" 19 | EMA implementation according to TensorBoard. 20 | """ 21 | last = scalars[0] 22 | smoothed = list() 23 | weight = 1.8 * (1 / (1 + math.exp(-0.05 * len(scalars))) - 0.5) # a sigmoid function 24 | for next_val in scalars: 25 | smoothed_val = last * weight + (1 - weight) * next_val 26 | smoothed.append(smoothed_val) 27 | last = smoothed_val 28 | return smoothed 29 | 30 | 31 | def plot_loss(save_dictionary: os.PathLike, keys: Optional[List[str]] = ["loss"]) -> None: 32 | 33 | with open(os.path.join(save_dictionary, TRAINER_STATE_NAME), "r", encoding="utf-8") as f: 34 | data = json.load(f) 35 | 36 | for key in keys: 37 | steps, metrics = [], [] 38 | for i in range(len(data["log_history"])): 39 | if key in data["log_history"][i]: 40 | steps.append(data["log_history"][i]["step"]) 41 | metrics.append(data["log_history"][i][key]) 42 | 43 | if len(metrics) == 0: 44 | logger.warning(f"No metric {key} to plot.") 45 | continue 46 | 47 | plt.figure() 48 | plt.plot(steps, metrics, alpha=0.4, label="original") 49 | plt.plot(steps, smooth(metrics), label="smoothed") 50 | plt.title("training {} of {}".format(key, save_dictionary)) 51 | plt.xlabel("step") 52 | plt.ylabel(key) 53 | plt.legend() 54 | plt.savefig(os.path.join(save_dictionary, "training_{}.png".format(key)), format="png", dpi=100) 55 | print("Figure saved:", os.path.join(save_dictionary, "training_{}.png".format(key))) 56 | -------------------------------------------------------------------------------- /src/llmtuner/hparams/evaluation_args.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Literal, Optional 3 | from dataclasses import dataclass, field 4 | 5 | from datasets import DownloadMode 6 | 7 | 8 | @dataclass 9 | class EvaluationArguments: 10 | r""" 11 | Arguments pertaining to specify the evaluation parameters. 12 | """ 13 | task: str = field( 14 | metadata={"help": "Name of the evaluation task."} 15 | ) 16 | task_dir: Optional[str] = field( 17 | default="evaluation", 18 | metadata={"help": "Path to the folder containing the evaluation datasets."} 19 | ) 20 | batch_size: Optional[int] = field( 21 | default=4, 22 | metadata={"help": "The batch size per GPU for evaluation."} 23 | ) 24 | seed: Optional[int] = field( 25 | default=42, 26 | metadata={"help": "Random seed to be used with data loaders."} 27 | ) 28 | lang: Optional[Literal["en", "zh"]] = field( 29 | default="en", 30 | metadata={"help": "Language used at evaluation."} 31 | ) 32 | n_shot: Optional[int] = field( 33 | default=5, 34 | metadata={"help": "Number of examplars for few-shot learning."} 35 | ) 36 | save_dir: Optional[str] = field( 37 | default=None, 38 | metadata={"help": "Path to save the evaluation results."} 39 | ) 40 | download_mode: Optional[DownloadMode] = field( 41 | default=DownloadMode.REUSE_DATASET_IF_EXISTS, 42 | metadata={"help": "Download mode used for the evaluation datasets."} 43 | ) 44 | 45 | def __post_init__(self): 46 | task_available = [] 47 | for folder in os.listdir(self.task_dir): 48 | if os.path.isdir(os.path.join(self.task_dir, folder)): 49 | task_available.append(folder) 50 | 51 | if self.task not in task_available: 52 | raise ValueError("Task {} not found in {}.".format(self.task, self.task_dir)) 53 | 54 | if self.save_dir is not None and os.path.exists(self.save_dir): 55 | raise ValueError("`save_dir` already exists, use another one.") 56 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from setuptools import setup, find_packages 4 | 5 | 6 | def get_version(): 7 | with open(os.path.join("src", "llmtuner", "__init__.py"), "r", encoding="utf-8") as f: 8 | file_content = f.read() 9 | pattern = r"{0}\W*=\W*\"([^\"]+)\"".format("__version__") 10 | version, = re.findall(pattern, file_content) 11 | return version 12 | 13 | 14 | def get_requires(): 15 | with open("requirements.txt", "r", encoding="utf-8") as f: 16 | file_content = f.read() 17 | lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")] 18 | return lines 19 | 20 | 21 | def main(): 22 | 23 | setup( 24 | name="llmtuner", 25 | version=get_version(), 26 | author="hiyouga", 27 | author_email="hiyouga" "@" "buaa.edu.cn", 28 | description="Easy-to-use LLM fine-tuning framework", 29 | long_description=open("README.md", "r", encoding="utf-8").read(), 30 | long_description_content_type="text/markdown", 31 | keywords=["LLaMA", "BLOOM", "Falcon", "LLM", "ChatGPT", "transformer", "pytorch", "deep learning"], 32 | license="Apache 2.0 License", 33 | url="https://github.com/hiyouga/LLaMA-Factory", 34 | package_dir={"": "src"}, 35 | packages=find_packages("src"), 36 | python_requires=">=3.8.0", 37 | install_requires=get_requires(), 38 | classifiers=[ 39 | "Development Status :: 3 - Alpha", 40 | "Intended Audience :: Developers", 41 | "Intended Audience :: Education", 42 | "Intended Audience :: Science/Research", 43 | "License :: OSI Approved :: Apache Software License", 44 | "Operating System :: OS Independent", 45 | "Programming Language :: Python :: 3", 46 | "Programming Language :: Python :: 3.8", 47 | "Programming Language :: Python :: 3.9", 48 | "Programming Language :: Python :: 3.10", 49 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 50 | ] 51 | ) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /src/llmtuner/hparams/generating_args.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | from dataclasses import asdict, dataclass, field 3 | 4 | 5 | @dataclass 6 | class GeneratingArguments: 7 | r""" 8 | Arguments pertaining to specify the decoding parameters. 9 | """ 10 | do_sample: Optional[bool] = field( 11 | default=True, 12 | metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."} 13 | ) 14 | temperature: Optional[float] = field( 15 | default=0.95, 16 | metadata={"help": "The value used to modulate the next token probabilities."} 17 | ) 18 | top_p: Optional[float] = field( 19 | default=0.7, 20 | metadata={"help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."} 21 | ) 22 | top_k: Optional[int] = field( 23 | default=50, 24 | metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."} 25 | ) 26 | num_beams: Optional[int] = field( 27 | default=1, 28 | metadata={"help": "Number of beams for beam search. 1 means no beam search."} 29 | ) 30 | max_length: Optional[int] = field( 31 | default=512, 32 | metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."} 33 | ) 34 | max_new_tokens: Optional[int] = field( 35 | default=512, 36 | metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."} 37 | ) 38 | repetition_penalty: Optional[float] = field( 39 | default=1.0, 40 | metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."} 41 | ) 42 | length_penalty: Optional[float] = field( 43 | default=1.0, 44 | metadata={"help": "Exponential penalty to the length that is used with beam-based generation."} 45 | ) 46 | 47 | def to_dict(self) -> Dict[str, Any]: 48 | args = asdict(self) 49 | if args.get("max_new_tokens", -1) > 0: 50 | args.pop("max_length", None) 51 | else: 52 | args.pop("max_new_tokens", None) 53 | return args 54 | -------------------------------------------------------------------------------- /src/llmtuner/train/ppo/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import torch 3 | from typing import TYPE_CHECKING, Dict, List, Literal, Optional 4 | 5 | from llmtuner.extras.packages import is_requests_available 6 | 7 | if TYPE_CHECKING: 8 | from transformers import PreTrainedModel 9 | from trl import AutoModelForCausalLMWithValueHead 10 | 11 | if is_requests_available(): 12 | import requests 13 | 14 | 15 | def get_rewards_from_server(server_url: str, messages: List[str]) -> List[torch.Tensor]: 16 | headers = {"Content-Type": "application/json"} 17 | payload = {"model": "model", "messages": messages} 18 | response = requests.post(server_url, json=payload, headers=headers) 19 | rewards = json.loads(response.text)["scores"] 20 | return torch.Tensor(rewards) 21 | 22 | 23 | def replace_model(model: "AutoModelForCausalLMWithValueHead", target: Literal["default", "reward"]) -> None: 24 | if target == "reward": # save default head temporarily 25 | valuehead_state_dict: Dict[str, torch.Tensor] = model.v_head.state_dict() 26 | setattr(model, "default_head_weight", valuehead_state_dict["summary.weight"].detach().clone()) 27 | setattr(model, "default_head_bias", valuehead_state_dict["summary.bias"].detach().clone()) 28 | 29 | model.pretrained_model.set_adapter(target) # set the LoRA adapter to be active 30 | model.v_head.load_state_dict({ 31 | "summary.weight": model.get_buffer("{}_head_weight".format(target)).detach().clone(), 32 | "summary.bias": model.get_buffer("{}_head_bias".format(target)).detach().clone() 33 | }) 34 | 35 | 36 | def dump_layernorm(model: "PreTrainedModel") -> Dict[str, torch.Tensor]: 37 | layer_norm_params = {} 38 | for name, param in model.named_parameters(): 39 | if param.data.dtype == torch.float32: 40 | layer_norm_params[name] = param.data.detach().clone() 41 | param.data = param.data.to(model.config.torch_dtype) 42 | 43 | return layer_norm_params 44 | 45 | 46 | def restore_layernorm(model: "PreTrainedModel", layernorm_params: Optional[Dict[str, torch.Tensor]] = None) -> None: 47 | for name, param in model.named_parameters(): 48 | if name in layernorm_params: 49 | param.data = layernorm_params[name] 50 | -------------------------------------------------------------------------------- /src/llmtuner/data/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | from typing import TYPE_CHECKING, Dict, List, Optional, Union 3 | 4 | from llmtuner.extras.logging import get_logger 5 | 6 | if TYPE_CHECKING: 7 | from datasets import Dataset, IterableDataset 8 | from transformers import TrainingArguments 9 | from llmtuner.hparams import DataArguments 10 | 11 | 12 | logger = get_logger(__name__) 13 | 14 | 15 | def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None: 16 | if file_sha1 is None: 17 | logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.") 18 | return 19 | 20 | if len(data_files) != 1: 21 | logger.warning("Checksum failed: too many files.") 22 | return 23 | 24 | with open(data_files[0], "rb") as f: 25 | sha1 = hashlib.sha1(f.read()).hexdigest() 26 | if sha1 != file_sha1: 27 | logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0])) 28 | 29 | 30 | def split_dataset( 31 | dataset: Union["Dataset", "IterableDataset"], 32 | data_args: "DataArguments", 33 | training_args: "TrainingArguments" 34 | ) -> Dict[str, "Dataset"]: 35 | if training_args.do_train: 36 | if data_args.val_size > 1e-6: # Split the dataset 37 | if data_args.streaming: 38 | val_set = dataset.take(int(data_args.val_size)) 39 | train_set = dataset.skip(int(data_args.val_size)) 40 | dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed) 41 | return {"train_dataset": train_set, "eval_dataset": val_set} 42 | else: 43 | val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size 44 | dataset = dataset.train_test_split(test_size=val_size, seed=training_args.seed) 45 | return {"train_dataset": dataset["train"], "eval_dataset": dataset["test"]} 46 | else: 47 | if data_args.streaming: 48 | dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed) 49 | return {"train_dataset": dataset} 50 | else: # do_eval or do_predict 51 | return {"eval_dataset": dataset} 52 | -------------------------------------------------------------------------------- /tests/quantize.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Quantizes models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ). 3 | # Usage: python quantize.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json 4 | # --max_length 1024 --max_samples 1024 5 | # dataset format: instruction (string), input (string), output (string), history (List[string]) 6 | 7 | import fire 8 | from datasets import load_dataset 9 | from transformers import AutoTokenizer 10 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 11 | 12 | 13 | def quantize(input_dir: str, output_dir: str, data_file: str, max_length: int, max_samples: int): 14 | tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left") 15 | 16 | def format_example(examples): 17 | prefix=("A chat between a curious user and an artificial intelligence assistant. " 18 | "The assistant gives helpful, detailed, and polite answers to the user's questions.") 19 | texts = [] 20 | for i in range(len(examples["instruction"])): 21 | prompt = prefix + "\n" 22 | if "history" in examples: 23 | for user_query, bot_resp in examples["history"][i]: 24 | prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp) 25 | prompt += "Human: {}\nAssistant: {}".format( 26 | examples["instruction"][i] + "\n" + examples["input"][i], examples["output"][i] 27 | ) 28 | texts.append(prompt) 29 | return tokenizer(texts, truncation=True, max_length=max_length) 30 | 31 | dataset = load_dataset("json", data_files=data_file)["train"] 32 | column_names = list(dataset.column_names) 33 | dataset = dataset.select(range(min(len(dataset), max_samples))) 34 | dataset = dataset.map(format_example, batched=True, remove_columns=column_names) 35 | dataset = dataset.shuffle() 36 | 37 | quantize_config = BaseQuantizeConfig( 38 | bits=4, 39 | group_size=128, 40 | desc_act=False 41 | ) 42 | 43 | model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config, trust_remote_code=True) 44 | model.quantize(dataset) 45 | model.save_quantized(output_dir) 46 | 47 | 48 | if __name__ == "__main__": 49 | fire.Fire(quantize) 50 | -------------------------------------------------------------------------------- /src/llmtuner/train/dpo/collator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dataclasses import dataclass 3 | from typing import Any, Dict, List, Sequence, Tuple 4 | from transformers import DataCollatorForSeq2Seq 5 | 6 | 7 | @dataclass 8 | class DPODataCollatorWithPadding(DataCollatorForSeq2Seq): 9 | r""" 10 | Data collator for pairwise data. 11 | """ 12 | 13 | def _pad_labels(self, batch: torch.Tensor, positions: List[Tuple[int, int]]) -> torch.Tensor: 14 | padded_labels = [] 15 | for feature, (prompt_len, answer_len) in zip(batch, positions): 16 | if self.tokenizer.padding_side == "left": 17 | start, end = feature.size(0) - answer_len, feature.size(0) 18 | else: 19 | start, end = prompt_len, prompt_len + answer_len 20 | padded_tensor = self.label_pad_token_id * torch.ones_like(feature) 21 | padded_tensor[start:end] = feature[start:end] 22 | padded_labels.append(padded_tensor) 23 | return torch.stack(padded_labels, dim=0).contiguous() # in contiguous memory 24 | 25 | def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]: 26 | r""" 27 | Pads batched data to the longest sequence in the batch. 28 | 29 | We generate 2 * n examples where the first n examples represent chosen examples and 30 | the last n examples represent rejected examples. 31 | """ 32 | concatenated_features = [] 33 | label_positions = [] 34 | for key in ("chosen_ids", "rejected_ids"): 35 | for feature in features: 36 | prompt_len, answer_len = len(feature["prompt_ids"]), len(feature[key]) 37 | concatenated_features.append({ 38 | "input_ids": feature["prompt_ids"] + feature[key], 39 | "attention_mask": [1] * (prompt_len + answer_len) 40 | }) 41 | label_positions.append((prompt_len, answer_len)) 42 | 43 | batch = self.tokenizer.pad( 44 | concatenated_features, 45 | padding=self.padding, 46 | max_length=self.max_length, 47 | pad_to_multiple_of=self.pad_to_multiple_of, 48 | return_tensors=self.return_tensors, 49 | ) 50 | batch["labels"] = self._pad_labels(batch["input_ids"], label_positions) 51 | return batch 52 | -------------------------------------------------------------------------------- /src/llmtuner/train/sft/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dataclasses import dataclass 3 | from typing import TYPE_CHECKING, Dict, Sequence, Tuple, Union 4 | 5 | from llmtuner.extras.constants import IGNORE_INDEX 6 | from llmtuner.extras.packages import ( 7 | is_jieba_available, is_nltk_available, is_rouge_available 8 | ) 9 | 10 | if TYPE_CHECKING: 11 | from transformers.tokenization_utils import PreTrainedTokenizer 12 | 13 | if is_jieba_available(): 14 | import jieba 15 | 16 | if is_nltk_available(): 17 | from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction 18 | 19 | if is_rouge_available(): 20 | from rouge_chinese import Rouge 21 | 22 | 23 | @dataclass 24 | class ComputeMetrics: 25 | r""" 26 | Wraps the tokenizer into metric functions, used in Seq2SeqPeftTrainer. 27 | """ 28 | 29 | tokenizer: "PreTrainedTokenizer" 30 | 31 | def __call__(self, eval_preds: Sequence[Union[np.ndarray, Tuple[np.ndarray]]]) -> Dict[str, float]: 32 | r""" 33 | Uses the model predictions to compute metrics. 34 | """ 35 | preds, labels = eval_preds 36 | score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []} 37 | 38 | preds = np.where(preds != IGNORE_INDEX, preds, self.tokenizer.pad_token_id) 39 | labels = np.where(labels != IGNORE_INDEX, labels, self.tokenizer.pad_token_id) 40 | 41 | decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True) 42 | decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True) 43 | 44 | for pred, label in zip(decoded_preds, decoded_labels): 45 | hypothesis = list(jieba.cut(pred)) 46 | reference = list(jieba.cut(label)) 47 | 48 | if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0: 49 | result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}} 50 | else: 51 | rouge = Rouge() 52 | scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference)) 53 | result = scores[0] 54 | 55 | for k, v in result.items(): 56 | score_dict[k].append(round(v["f"] * 100, 4)) 57 | 58 | bleu_score = sentence_bleu([list(label)], list(pred), smoothing_function=SmoothingFunction().method3) 59 | score_dict["bleu-4"].append(round(bleu_score * 100, 4)) 60 | 61 | return {k: float(np.mean(v)) for k, v in score_dict.items()} 62 | -------------------------------------------------------------------------------- /src/llmtuner/eval/template.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import TYPE_CHECKING, Dict, List, Tuple 3 | 4 | from llmtuner.extras.constants import CHOICES 5 | 6 | if TYPE_CHECKING: 7 | from datasets import Dataset 8 | 9 | 10 | @dataclass 11 | class EvalTemplate: 12 | 13 | system: str 14 | choice: str 15 | answer: str 16 | prefix: str 17 | 18 | def parse_example( 19 | self, 20 | example: Dict[str, str] 21 | ) -> Tuple[str, str]: 22 | candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example] 23 | return "".join([example["question"]] + candidates + [self.answer]), example["answer"] 24 | 25 | def format_example( 26 | self, 27 | target_data: Dict[str, str], 28 | support_set: "Dataset", 29 | subject_name: str, 30 | use_history: bool 31 | ) -> Tuple[str, str, List[Tuple[str, str]]]: 32 | query, resp = self.parse_example(target_data) 33 | history = [self.parse_example(support_set[k]) for k in range(len(support_set))] 34 | 35 | if len(history): 36 | temp = history.pop(0) 37 | history.insert(0, (self.system.format(subject=subject_name) + temp[0], temp[1])) 38 | else: 39 | query = self.system.format(subject=subject_name) + query 40 | 41 | if not use_history: 42 | query = "\n\n".join(["".join(item) for item in history] + [query]) 43 | history = [] 44 | return query.strip(), resp, history 45 | 46 | 47 | eval_templates: Dict[str, EvalTemplate] = {} 48 | 49 | 50 | def register_eval_template( 51 | name: str, 52 | system: str, 53 | choice: str, 54 | answer: str, 55 | prefix: str 56 | ) -> None: 57 | eval_templates[name] = EvalTemplate( 58 | system=system, 59 | choice=choice, 60 | answer=answer, 61 | prefix=prefix 62 | ) 63 | 64 | 65 | def get_eval_template(name: str) -> EvalTemplate: 66 | eval_template = eval_templates.get(name, None) 67 | assert eval_template is not None, "Template {} does not exist.".format(name) 68 | return eval_template 69 | 70 | 71 | register_eval_template( 72 | name="en", 73 | system="The following are multiple choice questions (with answers) about {subject}.\n\n", 74 | choice="\n{choice}. {content}", 75 | answer="\nAnswer: ", 76 | prefix=" " 77 | ) 78 | 79 | 80 | register_eval_template( 81 | name="zh", 82 | system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n", 83 | choice="\n{choice}. {content}", 84 | answer="\n答案:", 85 | prefix="\n" 86 | ) 87 | -------------------------------------------------------------------------------- /src/llmtuner/train/pt/workflow.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/language-modeling/run_clm.py 2 | 3 | import math 4 | from typing import TYPE_CHECKING, Optional, List 5 | from transformers import DataCollatorForLanguageModeling, Trainer 6 | 7 | from llmtuner.data import get_dataset, preprocess_dataset, split_dataset 8 | from llmtuner.extras.ploting import plot_loss 9 | from llmtuner.model import load_model_and_tokenizer 10 | from llmtuner.train.utils import create_modelcard_and_push 11 | 12 | if TYPE_CHECKING: 13 | from transformers import Seq2SeqTrainingArguments, TrainerCallback 14 | from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments 15 | 16 | 17 | def run_pt( 18 | model_args: "ModelArguments", 19 | data_args: "DataArguments", 20 | training_args: "Seq2SeqTrainingArguments", 21 | finetuning_args: "FinetuningArguments", 22 | callbacks: Optional[List["TrainerCallback"]] = None 23 | ): 24 | dataset = get_dataset(model_args, data_args) 25 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) 26 | dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="pt") 27 | data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) 28 | 29 | # Initialize our Trainer 30 | trainer = Trainer( 31 | model=model, 32 | args=training_args, 33 | tokenizer=tokenizer, 34 | data_collator=data_collator, 35 | callbacks=callbacks, 36 | **split_dataset(dataset, data_args, training_args) 37 | ) 38 | 39 | # Training 40 | if training_args.do_train: 41 | train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) 42 | trainer.save_model() 43 | trainer.log_metrics("train", train_result.metrics) 44 | trainer.save_metrics("train", train_result.metrics) 45 | trainer.save_state() 46 | if trainer.is_world_process_zero() and finetuning_args.plot_loss: 47 | plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) 48 | 49 | # Evaluation 50 | if training_args.do_eval: 51 | metrics = trainer.evaluate(metric_key_prefix="eval") 52 | try: 53 | perplexity = math.exp(metrics["eval_loss"]) 54 | except OverflowError: 55 | perplexity = float("inf") 56 | 57 | metrics["perplexity"] = perplexity 58 | trainer.log_metrics("eval", metrics) 59 | trainer.save_metrics("eval", metrics) 60 | 61 | # Create model card 62 | create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) 63 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/top.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict 3 | 4 | from llmtuner.data.template import templates 5 | from llmtuner.extras.constants import METHODS, SUPPORTED_MODELS 6 | from llmtuner.webui.common import get_model_path, get_template, list_adapters, save_config 7 | from llmtuner.webui.utils import can_quantize 8 | 9 | if TYPE_CHECKING: 10 | from gradio.components import Component 11 | 12 | 13 | def create_top() -> Dict[str, "Component"]: 14 | available_models = list(SUPPORTED_MODELS.keys()) + ["Custom"] 15 | 16 | with gr.Row(): 17 | lang = gr.Dropdown(choices=["en", "zh"], scale=1) 18 | model_name = gr.Dropdown(choices=available_models, scale=3) 19 | model_path = gr.Textbox(scale=3) 20 | 21 | with gr.Row(): 22 | finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1) 23 | adapter_path = gr.Dropdown(multiselect=True, scale=5, allow_custom_value=True) 24 | refresh_btn = gr.Button(scale=1) 25 | 26 | with gr.Accordion(label="Advanced config", open=False) as advanced_tab: 27 | with gr.Row(): 28 | quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none") 29 | template = gr.Dropdown(choices=list(templates.keys()), value="default") 30 | rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none") 31 | booster = gr.Radio(choices=["none", "flash_attn", "unsloth"], value="none") 32 | 33 | model_name.change( 34 | list_adapters, [model_name, finetuning_type], [adapter_path], queue=False 35 | ).then( 36 | get_model_path, [model_name], [model_path], queue=False 37 | ).then( 38 | get_template, [model_name], [template], queue=False 39 | ) # do not save config since the below line will save 40 | 41 | model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False) 42 | 43 | finetuning_type.change( 44 | list_adapters, [model_name, finetuning_type], [adapter_path], queue=False 45 | ).then( 46 | can_quantize, [finetuning_type], [quantization_bit], queue=False 47 | ) 48 | 49 | refresh_btn.click( 50 | list_adapters, [model_name, finetuning_type], [adapter_path], queue=False 51 | ) 52 | 53 | return dict( 54 | lang=lang, 55 | model_name=model_name, 56 | model_path=model_path, 57 | finetuning_type=finetuning_type, 58 | adapter_path=adapter_path, 59 | refresh_btn=refresh_btn, 60 | advanced_tab=advanced_tab, 61 | quantization_bit=quantization_bit, 62 | template=template, 63 | rope_scaling=rope_scaling, 64 | booster=booster 65 | ) 66 | -------------------------------------------------------------------------------- /src/llmtuner/api/protocol.py: -------------------------------------------------------------------------------- 1 | import time 2 | from enum import Enum 3 | from pydantic import BaseModel, Field 4 | from typing import List, Optional 5 | 6 | 7 | class Role(str, Enum): 8 | USER = "user" 9 | ASSISTANT = "assistant" 10 | SYSTEM = "system" 11 | 12 | 13 | class Finish(str, Enum): 14 | STOP = "stop" 15 | LENGTH = "length" 16 | 17 | 18 | class ModelCard(BaseModel): 19 | id: str 20 | object: Optional[str] = "model" 21 | created: Optional[int] = Field(default_factory=lambda: int(time.time())) 22 | owned_by: Optional[str] = "owner" 23 | 24 | 25 | class ModelList(BaseModel): 26 | object: Optional[str] = "list" 27 | data: Optional[List[ModelCard]] = [] 28 | 29 | 30 | class ChatMessage(BaseModel): 31 | role: Role 32 | content: str 33 | 34 | 35 | class DeltaMessage(BaseModel): 36 | role: Optional[Role] = None 37 | content: Optional[str] = None 38 | 39 | 40 | class ChatCompletionRequest(BaseModel): 41 | model: str 42 | messages: List[ChatMessage] 43 | do_sample: Optional[bool] = True 44 | temperature: Optional[float] = None 45 | top_p: Optional[float] = None 46 | n: Optional[int] = 1 47 | max_tokens: Optional[int] = None 48 | stream: Optional[bool] = False 49 | 50 | 51 | class ChatCompletionResponseChoice(BaseModel): 52 | index: int 53 | message: ChatMessage 54 | finish_reason: Finish 55 | 56 | 57 | class ChatCompletionResponseStreamChoice(BaseModel): 58 | index: int 59 | delta: DeltaMessage 60 | finish_reason: Optional[Finish] = None 61 | 62 | 63 | class ChatCompletionResponseUsage(BaseModel): 64 | prompt_tokens: int 65 | completion_tokens: int 66 | total_tokens: int 67 | 68 | 69 | class ChatCompletionResponse(BaseModel): 70 | id: Optional[str] = "chatcmpl-default" 71 | object: Optional[str] = "chat.completion" 72 | created: Optional[int] = Field(default_factory=lambda: int(time.time())) 73 | model: str 74 | choices: List[ChatCompletionResponseChoice] 75 | usage: ChatCompletionResponseUsage 76 | 77 | 78 | class ChatCompletionStreamResponse(BaseModel): 79 | id: Optional[str] = "chatcmpl-default" 80 | object: Optional[str] = "chat.completion.chunk" 81 | created: Optional[int] = Field(default_factory=lambda: int(time.time())) 82 | model: str 83 | choices: List[ChatCompletionResponseStreamChoice] 84 | 85 | 86 | class ScoreEvaluationRequest(BaseModel): 87 | model: str 88 | messages: List[str] 89 | max_length: Optional[int] = None 90 | 91 | 92 | class ScoreEvaluationResponse(BaseModel): 93 | id: Optional[str] = "scoreeval-default" 94 | object: Optional[str] = "score.evaluation" 95 | model: str 96 | scores: List[float] 97 | -------------------------------------------------------------------------------- /tests/cal_lr.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters. 3 | # Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16 4 | # Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py 5 | 6 | import fire 7 | import math 8 | import torch 9 | from tqdm import tqdm 10 | from typing import Optional 11 | from torch.utils.data import DataLoader 12 | from transformers import DataCollatorForSeq2Seq 13 | 14 | from llmtuner.data import get_dataset, preprocess_dataset 15 | from llmtuner.extras.constants import IGNORE_INDEX 16 | from llmtuner.model import get_train_args, load_model_and_tokenizer 17 | 18 | 19 | BASE_LR = 3e-4 # 1.5e-4 for 30B-70B models 20 | BASE_BS = 4_000_000 # from llama paper 21 | 22 | 23 | def calculate_lr( 24 | model_name_or_path: str, 25 | dataset: str, 26 | cutoff_len: int, # i.e. maximum input length during training 27 | batch_size: int, # total batch size, namely (batch size * gradient accumulation * world size) 28 | is_mistral: bool, # mistral model uses a smaller learning rate, 29 | dataset_dir: Optional[str] = "../data" 30 | ): 31 | model_args, data_args, training_args, finetuning_args, _ = get_train_args(dict( 32 | stage="sft", 33 | model_name_or_path=model_name_or_path, 34 | dataset=dataset, 35 | dataset_dir=dataset_dir, 36 | template="default", 37 | cutoff_len=cutoff_len, 38 | output_dir="dummy_dir" 39 | )) 40 | trainset = get_dataset(model_args, data_args) 41 | _, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, is_trainable=False, add_valuehead=False) 42 | trainset = preprocess_dataset(trainset, tokenizer, data_args, training_args, stage="sft") 43 | data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX) 44 | dataloader = DataLoader( 45 | dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True 46 | ) 47 | valid_tokens, total_tokens = 0, 0 48 | for batch in tqdm(dataloader): 49 | valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item() 50 | total_tokens += torch.numel(batch["labels"]) 51 | 52 | batch_max_len = cutoff_len * batch_size # max tokens in a batch 53 | valid_ratio = valid_tokens / total_tokens 54 | batch_valid_len = batch_max_len * valid_ratio 55 | lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS) # lr ~ sqrt(batch_size) 56 | lr = lr / 6.0 if is_mistral else lr 57 | print("Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format( 58 | lr, valid_ratio * 100, batch_valid_len 59 | )) 60 | 61 | 62 | if __name__ == "__main__": 63 | fire.Fire(calculate_lr) 64 | -------------------------------------------------------------------------------- /src/llmtuner/webui/engine.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from gradio.components import Component # cannot use TYPE_CHECKING here 3 | from typing import Any, Dict, Generator, Optional 4 | 5 | from llmtuner.webui.chatter import WebChatModel 6 | from llmtuner.webui.common import get_model_path, list_dataset, load_config 7 | from llmtuner.webui.locales import LOCALES 8 | from llmtuner.webui.manager import Manager 9 | from llmtuner.webui.runner import Runner 10 | from llmtuner.webui.utils import get_time 11 | 12 | 13 | class Engine: 14 | 15 | def __init__(self, demo_mode: Optional[bool] = False, pure_chat: Optional[bool] = False) -> None: 16 | self.demo_mode = demo_mode 17 | self.pure_chat = pure_chat 18 | self.manager = Manager() 19 | self.runner = Runner(self.manager, demo_mode=demo_mode) 20 | self.chatter = WebChatModel(manager=self.manager, demo_mode=demo_mode, lazy_init=(not pure_chat)) 21 | 22 | def _form_dict(self, resume_dict: Dict[str, Dict[str, Any]]): 23 | return {self.manager.get_elem_by_name(k): gr.update(**v) for k, v in resume_dict.items()} 24 | 25 | def resume(self) -> Generator[Dict[Component, Dict[str, Any]], None, None]: 26 | user_config = load_config() if not self.demo_mode else {} 27 | lang = user_config.get("lang", None) or "en" 28 | 29 | init_dict = { 30 | "top.lang": {"value": lang}, 31 | "infer.chat_box": {"visible": self.chatter.loaded} 32 | } 33 | 34 | if not self.pure_chat: 35 | init_dict["train.dataset"] = {"choices": list_dataset()["choices"]} 36 | init_dict["eval.dataset"] = {"choices": list_dataset()["choices"]} 37 | 38 | if user_config.get("last_model", None): 39 | init_dict["top.model_name"] = {"value": user_config["last_model"]} 40 | init_dict["top.model_path"] = {"value": get_model_path(user_config["last_model"])} 41 | 42 | yield self._form_dict(init_dict) 43 | 44 | if not self.pure_chat: 45 | if self.runner.alive: 46 | yield {elem: gr.update(value=value) for elem, value in self.runner.running_data.items()} 47 | if self.runner.do_train: 48 | yield self._form_dict({"train.resume_btn": {"value": True}}) 49 | else: 50 | yield self._form_dict({"eval.resume_btn": {"value": True}}) 51 | else: 52 | yield self._form_dict({ 53 | "train.output_dir": {"value": "train_" + get_time()}, 54 | "eval.output_dir": {"value": "eval_" + get_time()}, 55 | }) 56 | 57 | def change_lang(self, lang: str) -> Dict[Component, Dict[str, Any]]: 58 | return { 59 | component: gr.update(**LOCALES[name][lang]) 60 | for elems in self.manager.all_elems.values() for name, component in elems.items() if name in LOCALES 61 | } 62 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/eval.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict 3 | 4 | from llmtuner.webui.common import list_dataset, DEFAULT_DATA_DIR 5 | from llmtuner.webui.components.data import create_preview_box 6 | 7 | if TYPE_CHECKING: 8 | from gradio.components import Component 9 | from llmtuner.webui.engine import Engine 10 | 11 | 12 | def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]: 13 | input_elems = engine.manager.get_base_elems() 14 | elem_dict = dict() 15 | 16 | with gr.Row(): 17 | dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2) 18 | dataset = gr.Dropdown(multiselect=True, scale=4) 19 | preview_elems = create_preview_box(dataset_dir, dataset) 20 | 21 | dataset_dir.change(list_dataset, [dataset_dir], [dataset], queue=False) 22 | 23 | input_elems.update({dataset_dir, dataset}) 24 | elem_dict.update(dict(dataset_dir=dataset_dir, dataset=dataset, **preview_elems)) 25 | 26 | with gr.Row(): 27 | cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1) 28 | max_samples = gr.Textbox(value="100000") 29 | batch_size = gr.Slider(value=8, minimum=1, maximum=512, step=1) 30 | predict = gr.Checkbox(value=True) 31 | 32 | input_elems.update({cutoff_len, max_samples, batch_size, predict}) 33 | elem_dict.update(dict( 34 | cutoff_len=cutoff_len, max_samples=max_samples, batch_size=batch_size, predict=predict 35 | )) 36 | 37 | with gr.Row(): 38 | max_new_tokens = gr.Slider(10, 2048, value=128, step=1) 39 | top_p = gr.Slider(0.01, 1, value=0.7, step=0.01) 40 | temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01) 41 | output_dir = gr.Textbox() 42 | 43 | input_elems.update({max_new_tokens, top_p, temperature, output_dir}) 44 | elem_dict.update(dict( 45 | max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature, output_dir=output_dir 46 | )) 47 | 48 | with gr.Row(): 49 | cmd_preview_btn = gr.Button() 50 | start_btn = gr.Button() 51 | stop_btn = gr.Button() 52 | 53 | with gr.Row(): 54 | resume_btn = gr.Checkbox(visible=False, interactive=False, value=False) 55 | process_bar = gr.Slider(visible=False, interactive=False) 56 | 57 | with gr.Box(): 58 | output_box = gr.Markdown() 59 | 60 | output_elems = [output_box, process_bar] 61 | elem_dict.update(dict( 62 | cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, 63 | resume_btn=resume_btn, process_bar=process_bar, output_box=output_box 64 | )) 65 | 66 | cmd_preview_btn.click(engine.runner.preview_eval, input_elems, output_elems) 67 | start_btn.click(engine.runner.run_eval, input_elems, output_elems) 68 | stop_btn.click(engine.runner.set_abort, queue=False) 69 | resume_btn.change(engine.runner.monitor, outputs=output_elems) 70 | 71 | return elem_dict 72 | -------------------------------------------------------------------------------- /src/llmtuner/webui/interface.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import Optional 3 | from transformers.utils.versions import require_version 4 | 5 | from llmtuner.webui.components import ( 6 | create_top, 7 | create_train_tab, 8 | create_eval_tab, 9 | create_infer_tab, 10 | create_export_tab, 11 | create_chat_box 12 | ) 13 | from llmtuner.webui.common import save_config 14 | from llmtuner.webui.css import CSS 15 | from llmtuner.webui.engine import Engine 16 | 17 | 18 | require_version("gradio>=3.38.0,<4.0.0", "To fix: pip install \"gradio>=3.38.0,<4.0.0\"") 19 | 20 | 21 | def create_ui(demo_mode: Optional[bool] = False) -> gr.Blocks: 22 | engine = Engine(demo_mode=demo_mode, pure_chat=False) 23 | 24 | with gr.Blocks(title="LLaMA Board", css=CSS) as demo: 25 | if demo_mode: 26 | gr.HTML( 27 | "

LLaMA Board: A One-stop Web UI for Getting Started with LLaMA Factory

" 28 | ) 29 | gr.HTML( 30 | "

Visit " 31 | "LLaMA Factory for details.

" 32 | ) 33 | gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") 34 | 35 | engine.manager.all_elems["top"] = create_top() 36 | lang: "gr.Dropdown" = engine.manager.get_elem_by_name("top.lang") 37 | 38 | with gr.Tab("Train"): 39 | engine.manager.all_elems["train"] = create_train_tab(engine) 40 | 41 | with gr.Tab("Evaluate & Predict"): 42 | engine.manager.all_elems["eval"] = create_eval_tab(engine) 43 | 44 | with gr.Tab("Chat"): 45 | engine.manager.all_elems["infer"] = create_infer_tab(engine) 46 | 47 | if not demo_mode: 48 | with gr.Tab("Export"): 49 | engine.manager.all_elems["export"] = create_export_tab(engine) 50 | 51 | demo.load(engine.resume, outputs=engine.manager.list_elems()) 52 | lang.change(engine.change_lang, [lang], engine.manager.list_elems(), queue=False) 53 | lang.input(save_config, inputs=[lang], queue=False) 54 | 55 | return demo 56 | 57 | 58 | def create_web_demo() -> gr.Blocks: 59 | engine = Engine(pure_chat=True) 60 | 61 | with gr.Blocks(title="Web Demo", css=CSS) as demo: 62 | lang = gr.Dropdown(choices=["en", "zh"]) 63 | engine.manager.all_elems["top"] = dict(lang=lang) 64 | 65 | chat_box, _, _, chat_elems = create_chat_box(engine, visible=True) 66 | engine.manager.all_elems["infer"] = dict(chat_box=chat_box, **chat_elems) 67 | 68 | demo.load(engine.resume, outputs=engine.manager.list_elems()) 69 | lang.change(engine.change_lang, [lang], engine.manager.list_elems(), queue=False) 70 | lang.input(save_config, inputs=[lang], queue=False) 71 | 72 | return demo 73 | 74 | 75 | if __name__ == "__main__": 76 | demo = create_ui() 77 | demo.queue() 78 | demo.launch(server_name="0.0.0.0", share=False, inbrowser=True) 79 | -------------------------------------------------------------------------------- /src/llmtuner/train/tuner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import TYPE_CHECKING, Any, Dict, List, Optional 3 | 4 | from llmtuner.extras.callbacks import LogCallback 5 | from llmtuner.extras.logging import get_logger 6 | from llmtuner.model import get_train_args, get_infer_args, load_model_and_tokenizer 7 | from llmtuner.train.pt import run_pt 8 | from llmtuner.train.sft import run_sft 9 | from llmtuner.train.rm import run_rm 10 | from llmtuner.train.ppo import run_ppo 11 | from llmtuner.train.dpo import run_dpo 12 | 13 | if TYPE_CHECKING: 14 | from transformers import TrainerCallback 15 | 16 | 17 | logger = get_logger(__name__) 18 | 19 | 20 | def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None): 21 | model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args) 22 | callbacks = [LogCallback()] if callbacks is None else callbacks 23 | 24 | if finetuning_args.stage == "pt": 25 | run_pt(model_args, data_args, training_args, finetuning_args, callbacks) 26 | elif finetuning_args.stage == "sft": 27 | run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) 28 | elif finetuning_args.stage == "rm": 29 | run_rm(model_args, data_args, training_args, finetuning_args, callbacks) 30 | elif finetuning_args.stage == "ppo": 31 | run_ppo(model_args, data_args, training_args, finetuning_args, generating_args, callbacks) 32 | elif finetuning_args.stage == "dpo": 33 | run_dpo(model_args, data_args, training_args, finetuning_args, callbacks) 34 | else: 35 | raise ValueError("Unknown task.") 36 | 37 | 38 | def export_model(args: Optional[Dict[str, Any]] = None): 39 | model_args, _, finetuning_args, _ = get_infer_args(args) 40 | 41 | if model_args.adapter_name_or_path is not None and model_args.export_quantization_bit is not None: 42 | raise ValueError("Please merge adapters before quantizing the model.") 43 | 44 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args) 45 | 46 | if getattr(model, "quantization_method", None) and model_args.adapter_name_or_path is not None: 47 | logger.warning("Cannot merge adapters to a quantized model.") 48 | 49 | model.config.use_cache = True 50 | if getattr(model.config, "torch_dtype", None) == "bfloat16": 51 | model = model.to(torch.bfloat16).to("cpu") 52 | else: 53 | model = model.to(torch.float16).to("cpu") 54 | setattr(model.config, "torch_dtype", "float16") 55 | 56 | model.save_pretrained( 57 | save_directory=model_args.export_dir, 58 | max_shard_size="{}GB".format(model_args.export_size), 59 | safe_serialization=(not model_args.export_legacy_format) 60 | ) 61 | 62 | try: 63 | tokenizer.padding_side = "left" # restore padding side 64 | tokenizer.init_kwargs["padding_side"] = "left" 65 | tokenizer.save_pretrained(model_args.export_dir) 66 | except: 67 | logger.warning("Cannot save tokenizer, please copy the files manually.") 68 | 69 | 70 | if __name__ == "__main__": 71 | run_exp() 72 | -------------------------------------------------------------------------------- /src/llmtuner/webui/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import gradio as gr 4 | from typing import TYPE_CHECKING, Any, Dict 5 | from datetime import datetime 6 | 7 | from llmtuner.extras.packages import is_matplotlib_available 8 | from llmtuner.extras.ploting import smooth 9 | from llmtuner.webui.common import get_save_dir 10 | 11 | if TYPE_CHECKING: 12 | from llmtuner.extras.callbacks import LogCallback 13 | 14 | if is_matplotlib_available(): 15 | import matplotlib.figure 16 | import matplotlib.pyplot as plt 17 | 18 | 19 | def update_process_bar(callback: "LogCallback") -> Dict[str, Any]: 20 | if not callback.max_steps: 21 | return gr.update(visible=False) 22 | 23 | percentage = round(100 * callback.cur_steps / callback.max_steps, 0) if callback.max_steps != 0 else 100.0 24 | label = "Running {:d}/{:d}: {} < {}".format( 25 | callback.cur_steps, 26 | callback.max_steps, 27 | callback.elapsed_time, 28 | callback.remaining_time 29 | ) 30 | return gr.update(label=label, value=percentage, visible=True) 31 | 32 | 33 | def get_time() -> str: 34 | return datetime.now().strftime('%Y-%m-%d-%H-%M-%S') 35 | 36 | 37 | def can_quantize(finetuning_type: str) -> Dict[str, Any]: 38 | if finetuning_type != "lora": 39 | return gr.update(value="None", interactive=False) 40 | else: 41 | return gr.update(interactive=True) 42 | 43 | 44 | def gen_cmd(args: Dict[str, Any]) -> str: 45 | args.pop("disable_tqdm", None) 46 | args["plot_loss"] = args.get("do_train", None) 47 | current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0") 48 | cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)] 49 | for k, v in args.items(): 50 | if v is not None and v is not False and v != "": 51 | cmd_lines.append(" --{} {} ".format(k, str(v))) 52 | cmd_text = "\\\n".join(cmd_lines) 53 | cmd_text = "```bash\n{}\n```".format(cmd_text) 54 | return cmd_text 55 | 56 | 57 | def get_eval_results(path: os.PathLike) -> str: 58 | with open(path, "r", encoding="utf-8") as f: 59 | result = json.dumps(json.load(f), indent=4) 60 | return "```json\n{}\n```\n".format(result) 61 | 62 | 63 | def gen_plot(base_model: str, finetuning_type: str, output_dir: str) -> "matplotlib.figure.Figure": 64 | if not base_model: 65 | return 66 | log_file = get_save_dir(base_model, finetuning_type, output_dir, "trainer_log.jsonl") 67 | if not os.path.isfile(log_file): 68 | return 69 | 70 | plt.close("all") 71 | fig = plt.figure() 72 | ax = fig.add_subplot(111) 73 | steps, losses = [], [] 74 | with open(log_file, "r", encoding="utf-8") as f: 75 | for line in f: 76 | log_info = json.loads(line) 77 | if log_info.get("loss", None): 78 | steps.append(log_info["current_steps"]) 79 | losses.append(log_info["loss"]) 80 | 81 | if len(losses) == 0: 82 | return None 83 | 84 | ax.plot(steps, losses, alpha=0.4, label="original") 85 | ax.plot(steps, smooth(losses), label="smoothed") 86 | ax.legend() 87 | ax.set_xlabel("step") 88 | ax.set_ylabel("loss") 89 | return fig 90 | -------------------------------------------------------------------------------- /tests/loftq_init.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ) 3 | # Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir 4 | # Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py 5 | 6 | import os 7 | import fire 8 | import torch 9 | import torch.nn as nn 10 | from typing import Optional 11 | from transformers import AutoModelForCausalLM, AutoTokenizer 12 | from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model 13 | 14 | 15 | class Shell(nn.Module): 16 | 17 | def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None): 18 | super().__init__() 19 | self.weight = nn.Parameter(weight, requires_grad=False) 20 | if bias is not None: 21 | self.bias = nn.Parameter(bias, requires_grad=False) 22 | 23 | 24 | def unwrap_model(model: nn.Module, pattern=".base_layer") -> None: 25 | for name in set([k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k]): 26 | parent_name = ".".join(name.split(".")[:-1]) 27 | child_name = name.split(".")[-1] 28 | parent_module = model.get_submodule(parent_name) 29 | child_module = getattr(parent_module, child_name) 30 | base_layer = getattr(child_module, "base_layer") 31 | weight = getattr(base_layer, "weight", None) 32 | bias = getattr(base_layer, "bias", None) 33 | setattr(parent_module, child_name, Shell(weight, bias)) 34 | 35 | print("Model unwrapped.") 36 | 37 | 38 | def quantize_loftq( 39 | model_name_or_path: str, 40 | save_dir: str, 41 | loftq_bits: Optional[int] = 4, 42 | loftq_iter: Optional[int] = 1, 43 | lora_alpha: Optional[int] = None, 44 | lora_rank: Optional[int] = 16, 45 | lora_target: Optional[str] = "q_proj,v_proj" 46 | ): 47 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) 48 | model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto") 49 | loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter) 50 | lora_config = LoraConfig( 51 | task_type=TaskType.CAUSAL_LM, 52 | inference_mode=True, 53 | r=lora_rank, 54 | lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2, 55 | lora_dropout=0.1, 56 | target_modules=[name.strip() for name in lora_target.split(",")], 57 | init_lora_weights="loftq", 58 | loftq_config=loftq_config 59 | ) 60 | 61 | # Init LoftQ model 62 | lora_model = get_peft_model(model, lora_config) 63 | base_model = lora_model.get_base_model() 64 | 65 | # Save LoftQ model 66 | setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir) 67 | setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True) 68 | lora_model.save_pretrained(os.path.join(save_dir, "adapters")) 69 | 70 | # Save base model 71 | unwrap_model(base_model) 72 | base_model.save_pretrained(save_dir) 73 | tokenizer.save_pretrained(save_dir) 74 | 75 | 76 | if __name__ == "__main__": 77 | fire.Fire(quantize_loftq) 78 | -------------------------------------------------------------------------------- /src/llmtuner/train/rm/workflow.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py 2 | 3 | from typing import TYPE_CHECKING, Optional, List 4 | from transformers import Seq2SeqTrainingArguments 5 | 6 | from llmtuner.data import get_dataset, preprocess_dataset, split_dataset 7 | from llmtuner.extras.callbacks import SavePeftModelCallback 8 | from llmtuner.extras.ploting import plot_loss 9 | from llmtuner.model import load_model_and_tokenizer 10 | from llmtuner.train.rm.collator import PairwiseDataCollatorWithPadding 11 | from llmtuner.train.rm.metric import compute_accuracy 12 | from llmtuner.train.rm.trainer import PairwiseTrainer 13 | from llmtuner.train.utils import create_modelcard_and_push 14 | 15 | if TYPE_CHECKING: 16 | from transformers import TrainerCallback 17 | from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments 18 | 19 | 20 | def run_rm( 21 | model_args: "ModelArguments", 22 | data_args: "DataArguments", 23 | training_args: "Seq2SeqTrainingArguments", 24 | finetuning_args: "FinetuningArguments", 25 | callbacks: Optional[List["TrainerCallback"]] = None 26 | ): 27 | dataset = get_dataset(model_args, data_args) 28 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) 29 | dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="rm") 30 | data_collator = PairwiseDataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) 31 | 32 | # Update arguments 33 | training_args_dict = training_args.to_dict() 34 | training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset 35 | training_args = Seq2SeqTrainingArguments(**training_args_dict) 36 | 37 | # Initialize our Trainer 38 | trainer = PairwiseTrainer( 39 | model=model, 40 | args=training_args, 41 | tokenizer=tokenizer, 42 | data_collator=data_collator, 43 | callbacks=callbacks + [SavePeftModelCallback()], 44 | compute_metrics=compute_accuracy, 45 | **split_dataset(dataset, data_args, training_args) 46 | ) 47 | 48 | # Training 49 | if training_args.do_train: 50 | train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) 51 | trainer.save_model() 52 | trainer.log_metrics("train", train_result.metrics) 53 | trainer.save_metrics("train", train_result.metrics) 54 | trainer.save_state() 55 | if trainer.is_world_process_zero() and finetuning_args.plot_loss: 56 | plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) 57 | 58 | # Evaluation 59 | if training_args.do_eval: 60 | metrics = trainer.evaluate(metric_key_prefix="eval") 61 | trainer.log_metrics("eval", metrics) 62 | trainer.save_metrics("eval", metrics) 63 | 64 | # Predict 65 | if training_args.do_predict: 66 | predict_results = trainer.predict(dataset, metric_key_prefix="predict") 67 | trainer.log_metrics("predict", predict_results.metrics) 68 | trainer.save_metrics("predict", predict_results.metrics) 69 | trainer.save_predictions(predict_results) 70 | 71 | # Create model card 72 | create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) 73 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/export.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict, Generator, List 3 | 4 | from llmtuner.train import export_model 5 | from llmtuner.webui.common import get_save_dir 6 | from llmtuner.webui.locales import ALERTS 7 | 8 | if TYPE_CHECKING: 9 | from gradio.components import Component 10 | from llmtuner.webui.engine import Engine 11 | 12 | 13 | GPTQ_BITS = ["8", "4", "3", "2"] 14 | 15 | 16 | def save_model( 17 | lang: str, 18 | model_name: str, 19 | model_path: str, 20 | adapter_path: List[str], 21 | finetuning_type: str, 22 | template: str, 23 | max_shard_size: int, 24 | export_quantization_bit: int, 25 | export_quantization_dataset: str, 26 | export_dir: str 27 | ) -> Generator[str, None, None]: 28 | error = "" 29 | if not model_name: 30 | error = ALERTS["err_no_model"][lang] 31 | elif not model_path: 32 | error = ALERTS["err_no_path"][lang] 33 | elif not export_dir: 34 | error = ALERTS["err_no_export_dir"][lang] 35 | elif export_quantization_bit in GPTQ_BITS and not export_quantization_dataset: 36 | error = ALERTS["err_no_dataset"][lang] 37 | elif export_quantization_bit not in GPTQ_BITS and not adapter_path: 38 | error = ALERTS["err_no_adapter"][lang] 39 | 40 | if error: 41 | gr.Warning(error) 42 | yield error 43 | return 44 | 45 | if adapter_path: 46 | adapter_name_or_path = ",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]) 47 | else: 48 | adapter_name_or_path = None 49 | 50 | args = dict( 51 | model_name_or_path=model_path, 52 | adapter_name_or_path=adapter_name_or_path, 53 | finetuning_type=finetuning_type, 54 | template=template, 55 | export_dir=export_dir, 56 | export_size=max_shard_size, 57 | export_quantization_bit=int(export_quantization_bit) if export_quantization_bit in GPTQ_BITS else None, 58 | export_quantization_dataset=export_quantization_dataset 59 | ) 60 | 61 | yield ALERTS["info_exporting"][lang] 62 | export_model(args) 63 | yield ALERTS["info_exported"][lang] 64 | 65 | 66 | def create_export_tab(engine: "Engine") -> Dict[str, "Component"]: 67 | with gr.Row(): 68 | max_shard_size = gr.Slider(value=1, minimum=1, maximum=100) 69 | export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none") 70 | export_quantization_dataset = gr.Textbox(value="data/c4_demo.json") 71 | 72 | export_dir = gr.Textbox() 73 | export_btn = gr.Button() 74 | info_box = gr.Textbox(show_label=False, interactive=False) 75 | 76 | export_btn.click( 77 | save_model, 78 | [ 79 | engine.manager.get_elem_by_name("top.lang"), 80 | engine.manager.get_elem_by_name("top.model_name"), 81 | engine.manager.get_elem_by_name("top.model_path"), 82 | engine.manager.get_elem_by_name("top.adapter_path"), 83 | engine.manager.get_elem_by_name("top.finetuning_type"), 84 | engine.manager.get_elem_by_name("top.template"), 85 | max_shard_size, 86 | export_quantization_bit, 87 | export_quantization_dataset, 88 | export_dir 89 | ], 90 | [info_box] 91 | ) 92 | 93 | return dict( 94 | max_shard_size=max_shard_size, 95 | export_quantization_bit=export_quantization_bit, 96 | export_quantization_dataset=export_quantization_dataset, 97 | export_dir=export_dir, 98 | export_btn=export_btn, 99 | info_box=info_box 100 | ) 101 | -------------------------------------------------------------------------------- /src/llmtuner/train/dpo/workflow.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama_2/scripts/dpo_llama2.py 2 | 3 | from typing import TYPE_CHECKING, Optional, List 4 | from transformers import Seq2SeqTrainingArguments 5 | 6 | from llmtuner.data import get_dataset, preprocess_dataset, split_dataset 7 | from llmtuner.extras.constants import IGNORE_INDEX 8 | from llmtuner.extras.ploting import plot_loss 9 | from llmtuner.hparams import ModelArguments 10 | from llmtuner.model import load_model_and_tokenizer 11 | from llmtuner.train.dpo.collator import DPODataCollatorWithPadding 12 | from llmtuner.train.dpo.trainer import CustomDPOTrainer 13 | from llmtuner.train.utils import create_modelcard_and_push, create_ref_model 14 | 15 | if TYPE_CHECKING: 16 | from transformers import TrainerCallback 17 | from llmtuner.hparams import DataArguments, FinetuningArguments 18 | 19 | 20 | def run_dpo( 21 | model_args: "ModelArguments", 22 | data_args: "DataArguments", 23 | training_args: "Seq2SeqTrainingArguments", 24 | finetuning_args: "FinetuningArguments", 25 | callbacks: Optional[List["TrainerCallback"]] = None 26 | ): 27 | dataset = get_dataset(model_args, data_args) 28 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) 29 | dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="rm") 30 | data_collator = DPODataCollatorWithPadding( 31 | tokenizer=tokenizer, 32 | pad_to_multiple_of=8, 33 | label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id 34 | ) 35 | 36 | # Create reference model 37 | if finetuning_args.ref_model is None and (not training_args.do_train): # use the model itself 38 | ref_model = model 39 | else: 40 | ref_model = create_ref_model(model_args, finetuning_args) 41 | 42 | # Update arguments 43 | training_args_dict = training_args.to_dict() 44 | training_args_dict.update(dict(remove_unused_columns=False)) # important for pairwise dataset 45 | training_args = Seq2SeqTrainingArguments(**training_args_dict) 46 | 47 | # Initialize our Trainer 48 | trainer = CustomDPOTrainer( 49 | beta=finetuning_args.dpo_beta, 50 | loss_type=finetuning_args.dpo_loss, 51 | ftx_gamma=finetuning_args.dpo_ftx, 52 | model=model, 53 | ref_model=ref_model, 54 | args=training_args, 55 | tokenizer=tokenizer, 56 | data_collator=data_collator, 57 | callbacks=callbacks, 58 | **split_dataset(dataset, data_args, training_args) 59 | ) 60 | 61 | # Training 62 | if training_args.do_train: 63 | train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) 64 | trainer.save_model() 65 | trainer.log_metrics("train", train_result.metrics) 66 | trainer.save_metrics("train", train_result.metrics) 67 | trainer.save_state() 68 | if trainer.is_world_process_zero() and finetuning_args.plot_loss: 69 | plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) 70 | 71 | # Evaluation 72 | if training_args.do_eval: 73 | metrics = trainer.evaluate(metric_key_prefix="eval") 74 | if id(model) == id(ref_model): # unable to compute rewards without a reference model 75 | remove_keys = [key for key in metrics.keys() if "rewards" in key] 76 | for key in remove_keys: 77 | metrics.pop(key) 78 | trainer.log_metrics("eval", metrics) 79 | trainer.save_metrics("eval", metrics) 80 | 81 | # Create model card 82 | create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) 83 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import gradio as gr 4 | from typing import TYPE_CHECKING, Any, Dict, Tuple 5 | 6 | from llmtuner.webui.common import DATA_CONFIG 7 | 8 | if TYPE_CHECKING: 9 | from gradio.components import Component 10 | 11 | 12 | PAGE_SIZE = 2 13 | 14 | 15 | def prev_page(page_index: int) -> int: 16 | return page_index - 1 if page_index > 0 else page_index 17 | 18 | 19 | def next_page(page_index: int, total_num: int) -> int: 20 | return page_index + 1 if (page_index + 1) * PAGE_SIZE < total_num else page_index 21 | 22 | 23 | def can_preview(dataset_dir: str, dataset: list) -> Dict[str, Any]: 24 | try: 25 | with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: 26 | dataset_info = json.load(f) 27 | except: 28 | return gr.update(interactive=False) 29 | 30 | if ( 31 | len(dataset) > 0 32 | and "file_name" in dataset_info[dataset[0]] 33 | and os.path.isfile(os.path.join(dataset_dir, dataset_info[dataset[0]]["file_name"])) 34 | ): 35 | return gr.update(interactive=True) 36 | else: 37 | return gr.update(interactive=False) 38 | 39 | 40 | def get_preview(dataset_dir: str, dataset: list, page_index: int) -> Tuple[int, list, Dict[str, Any]]: 41 | with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: 42 | dataset_info = json.load(f) 43 | 44 | data_file: str = dataset_info[dataset[0]]["file_name"] 45 | with open(os.path.join(dataset_dir, data_file), "r", encoding="utf-8") as f: 46 | if data_file.endswith(".json"): 47 | data = json.load(f) 48 | elif data_file.endswith(".jsonl"): 49 | data = [json.loads(line) for line in f] 50 | else: 51 | data = [line for line in f] 52 | return len(data), data[PAGE_SIZE * page_index : PAGE_SIZE * (page_index + 1)], gr.update(visible=True) 53 | 54 | 55 | def create_preview_box(dataset_dir: "gr.Textbox", dataset: "gr.Dropdown") -> Dict[str, "Component"]: 56 | data_preview_btn = gr.Button(interactive=False, scale=1) 57 | with gr.Column(visible=False, elem_classes="modal-box") as preview_box: 58 | with gr.Row(): 59 | preview_count = gr.Number(value=0, interactive=False, precision=0) 60 | page_index = gr.Number(value=0, interactive=False, precision=0) 61 | 62 | with gr.Row(): 63 | prev_btn = gr.Button() 64 | next_btn = gr.Button() 65 | close_btn = gr.Button() 66 | 67 | with gr.Row(): 68 | preview_samples = gr.JSON(interactive=False) 69 | 70 | dataset.change( 71 | can_preview, [dataset_dir, dataset], [data_preview_btn], queue=False 72 | ).then( 73 | lambda: 0, outputs=[page_index], queue=False 74 | ) 75 | data_preview_btn.click( 76 | get_preview, 77 | [dataset_dir, dataset, page_index], 78 | [preview_count, preview_samples, preview_box], 79 | queue=False 80 | ) 81 | prev_btn.click( 82 | prev_page, [page_index], [page_index], queue=False 83 | ).then( 84 | get_preview, 85 | [dataset_dir, dataset, page_index], 86 | [preview_count, preview_samples, preview_box], 87 | queue=False 88 | ) 89 | next_btn.click( 90 | next_page, [page_index, preview_count], [page_index], queue=False 91 | ).then( 92 | get_preview, 93 | [dataset_dir, dataset, page_index], 94 | [preview_count, preview_samples, preview_box], 95 | queue=False 96 | ) 97 | close_btn.click(lambda: gr.update(visible=False), outputs=[preview_box], queue=False) 98 | return dict( 99 | data_preview_btn=data_preview_btn, 100 | preview_count=preview_count, 101 | page_index=page_index, 102 | prev_btn=prev_btn, 103 | next_btn=next_btn, 104 | close_btn=close_btn, 105 | preview_samples=preview_samples 106 | ) 107 | -------------------------------------------------------------------------------- /tests/llamafy_baichuan2.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Converts the Baichuan2-7B model in the same format as LLaMA2-7B. 3 | # Usage: python llamafy_baichuan2.py --input_dir input --output_dir output --shard_size 10GB 4 | # Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py 5 | # Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied 6 | 7 | import os 8 | import fire 9 | import json 10 | import torch 11 | from tqdm import tqdm 12 | from collections import OrderedDict 13 | from safetensors.torch import save_file 14 | from transformers.modeling_utils import ( 15 | shard_checkpoint, 16 | SAFE_WEIGHTS_NAME, 17 | SAFE_WEIGHTS_INDEX_NAME, 18 | WEIGHTS_NAME, 19 | WEIGHTS_INDEX_NAME 20 | ) 21 | from typing import Any, Dict, Optional 22 | 23 | 24 | CONFIG_NAME = "config.json" 25 | 26 | 27 | def save_weight( 28 | input_dir: str, 29 | output_dir: str, 30 | shard_size: str, 31 | save_safetensors: bool 32 | ): 33 | baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict() 34 | for filepath in os.listdir(input_dir): 35 | if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"): 36 | shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu") 37 | baichuan2_state_dict.update(shard_weight) 38 | 39 | llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() 40 | for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"): 41 | if "W_pack" in key: 42 | proj_size = value.size(0) // 3 43 | llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] 44 | llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size:2*proj_size, :] 45 | llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2*proj_size:, :] 46 | elif "lm_head" in key: 47 | llama2_state_dict[key] = torch.nn.functional.normalize(value) 48 | else: 49 | llama2_state_dict[key] = value 50 | 51 | weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME 52 | shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) 53 | 54 | for shard_file, shard in tqdm(shards.items(), desc="Save weights"): 55 | if save_safetensors: 56 | save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) 57 | else: 58 | torch.save(shard, os.path.join(output_dir, shard_file)) 59 | 60 | if index is None: 61 | print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME))) 62 | else: 63 | index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME 64 | with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: 65 | json.dump(index, f, indent=2, sort_keys=True) 66 | print("Model weights saved in {}".format(output_dir)) 67 | 68 | 69 | def save_config( 70 | input_dir: str, 71 | output_dir: str 72 | ): 73 | with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: 74 | llama2_config_dict: Dict[str, Any] = json.load(f) 75 | 76 | llama2_config_dict["architectures"] = ["LlamaForCausalLM"] 77 | llama2_config_dict.pop("auto_map", None) 78 | llama2_config_dict.pop("tokenizer_class", None) 79 | llama2_config_dict["model_type"] = "llama" 80 | 81 | with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: 82 | json.dump(llama2_config_dict, f, indent=2) 83 | print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME))) 84 | 85 | 86 | def llamafy_baichuan2( 87 | input_dir: str, 88 | output_dir: str, 89 | shard_size: str, 90 | save_safetensors: Optional[bool] = False 91 | ): 92 | try: 93 | os.makedirs(output_dir, exist_ok=False) 94 | except Exception as e: 95 | raise print("Output dir already exists", e) 96 | 97 | save_weight(input_dir, output_dir, shard_size, save_safetensors) 98 | save_config(input_dir, output_dir) 99 | 100 | 101 | if __name__ == "__main__": 102 | fire.Fire(llamafy_baichuan2) 103 | -------------------------------------------------------------------------------- /src/llmtuner/extras/misc.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import os 3 | import torch 4 | from typing import TYPE_CHECKING, Tuple 5 | from transformers import InfNanRemoveLogitsProcessor, LogitsProcessorList 6 | from transformers.utils import ( 7 | is_torch_bf16_gpu_available, 8 | is_torch_cuda_available, 9 | is_torch_npu_available, 10 | is_torch_xpu_available 11 | ) 12 | 13 | _is_fp16_available = is_torch_npu_available() or is_torch_cuda_available() 14 | try: 15 | _is_bf16_available = is_torch_bf16_gpu_available() 16 | except: 17 | _is_bf16_available = False 18 | 19 | 20 | if TYPE_CHECKING: 21 | from llmtuner.hparams import ModelArguments 22 | 23 | 24 | class AverageMeter: 25 | r""" 26 | Computes and stores the average and current value. 27 | """ 28 | def __init__(self): 29 | self.reset() 30 | 31 | def reset(self): 32 | self.val = 0 33 | self.avg = 0 34 | self.sum = 0 35 | self.count = 0 36 | 37 | def update(self, val, n=1): 38 | self.val = val 39 | self.sum += val * n 40 | self.count += n 41 | self.avg = self.sum / self.count 42 | 43 | 44 | def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: 45 | r""" 46 | Returns the number of trainable parameters and number of all parameters in the model. 47 | """ 48 | trainable_params, all_param = 0, 0 49 | for param in model.parameters(): 50 | num_params = param.numel() 51 | # if using DS Zero 3 and the weights are initialized empty 52 | if num_params == 0 and hasattr(param, "ds_numel"): 53 | num_params = param.ds_numel 54 | 55 | # Due to the design of 4bit linear layers from bitsandbytes, multiply the number of parameters by 2 56 | if param.__class__.__name__ == "Params4bit": 57 | num_params = num_params * 2 58 | 59 | all_param += num_params 60 | if param.requires_grad: 61 | trainable_params += num_params 62 | 63 | return trainable_params, all_param 64 | 65 | 66 | def get_current_device() -> torch.device: 67 | r""" 68 | Gets the current available device. 69 | """ 70 | if is_torch_xpu_available(): 71 | device = "xpu:{}".format(os.environ.get("LOCAL_RANK", "0")) 72 | elif is_torch_npu_available(): 73 | device = "npu:{}".format(os.environ.get("LOCAL_RANK", "0")) 74 | elif is_torch_cuda_available(): 75 | device = "cuda:{}".format(os.environ.get("LOCAL_RANK", "0")) 76 | else: 77 | device = "cpu" 78 | 79 | return torch.device(device) 80 | 81 | 82 | def get_logits_processor() -> "LogitsProcessorList": 83 | r""" 84 | Gets logits processor that removes NaN and Inf logits. 85 | """ 86 | logits_processor = LogitsProcessorList() 87 | logits_processor.append(InfNanRemoveLogitsProcessor()) 88 | return logits_processor 89 | 90 | 91 | def infer_optim_dtype(model_dtype: torch.dtype) -> torch.dtype: 92 | r""" 93 | Infers the optimal dtype according to the model_dtype and device compatibility. 94 | """ 95 | if _is_bf16_available and model_dtype == torch.bfloat16: 96 | return torch.bfloat16 97 | elif _is_fp16_available: 98 | return torch.float16 99 | else: 100 | return torch.float32 101 | 102 | 103 | def torch_gc() -> None: 104 | r""" 105 | Collects GPU memory. 106 | """ 107 | gc.collect() 108 | if torch.cuda.is_available(): 109 | torch.cuda.empty_cache() 110 | torch.cuda.ipc_collect() 111 | 112 | 113 | def try_download_model_from_ms(model_args: "ModelArguments") -> None: 114 | if not use_modelscope() or os.path.exists(model_args.model_name_or_path): 115 | return 116 | 117 | try: 118 | from modelscope import snapshot_download 119 | revision = "master" if model_args.model_revision == "main" else model_args.model_revision 120 | model_args.model_name_or_path = snapshot_download( 121 | model_args.model_name_or_path, 122 | revision=revision, 123 | cache_dir=model_args.cache_dir 124 | ) 125 | except ImportError: 126 | raise ImportError("Please install modelscope via `pip install modelscope -U`") 127 | 128 | 129 | def use_modelscope() -> bool: 130 | return bool(int(os.environ.get("USE_MODELSCOPE_HUB", "0"))) 131 | -------------------------------------------------------------------------------- /src/llmtuner/train/sft/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import numpy as np 5 | import torch.nn as nn 6 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union 7 | from transformers import Seq2SeqTrainer 8 | 9 | from llmtuner.extras.constants import IGNORE_INDEX 10 | from llmtuner.extras.logging import get_logger 11 | 12 | if TYPE_CHECKING: 13 | from transformers.trainer import PredictionOutput 14 | 15 | 16 | logger = get_logger(__name__) 17 | 18 | 19 | class CustomSeq2SeqTrainer(Seq2SeqTrainer): 20 | r""" 21 | Inherits PeftTrainer to compute generative metrics such as BLEU and ROUGE. 22 | """ 23 | 24 | def prediction_step( 25 | self, 26 | model: nn.Module, 27 | inputs: Dict[str, Union[torch.Tensor, Any]], 28 | prediction_loss_only: bool, 29 | ignore_keys: Optional[List[str]] = None, 30 | ) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: 31 | r""" 32 | Removes the prompt part in the generated tokens. 33 | 34 | Subclass and override to inject custom behavior. 35 | """ 36 | labels = inputs["labels"].detach().clone() if "labels" in inputs else None # backup labels 37 | if self.args.predict_with_generate: 38 | assert self.tokenizer.padding_side == "left", "This method only accepts left-padded tensor." 39 | prompt_len, label_len = inputs["input_ids"].size(-1), inputs["labels"].size(-1) 40 | if prompt_len > label_len: 41 | inputs["labels"] = self._pad_tensors_to_target_len(inputs["labels"], inputs["input_ids"]) 42 | if label_len > prompt_len: # truncate the labels instead of padding the inputs (llama2 fp16 compatibility) 43 | inputs["labels"] = inputs["labels"][:, :prompt_len] 44 | 45 | loss, generated_tokens, _ = super().prediction_step( # ignore the returned labels (may be truncated) 46 | model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys 47 | ) 48 | if generated_tokens is not None and self.args.predict_with_generate: 49 | generated_tokens[:, :prompt_len] = self.tokenizer.pad_token_id 50 | generated_tokens = generated_tokens.contiguous() 51 | 52 | return loss, generated_tokens, labels 53 | 54 | def _pad_tensors_to_target_len( 55 | self, 56 | src_tensor: torch.Tensor, 57 | tgt_tensor: torch.Tensor 58 | ) -> torch.Tensor: 59 | r""" 60 | Pads the tensor to the same length as the target tensor. 61 | """ 62 | assert self.tokenizer.pad_token_id is not None, "Pad token is required." 63 | padded_tensor = self.tokenizer.pad_token_id * torch.ones_like(tgt_tensor) 64 | padded_tensor[:, -src_tensor.shape[-1]:] = src_tensor # adopt left-padding 65 | return padded_tensor.contiguous() # in contiguous memory 66 | 67 | def save_predictions( 68 | self, 69 | predict_results: "PredictionOutput" 70 | ) -> None: 71 | r""" 72 | Saves model predictions to `output_dir`. 73 | 74 | A custom behavior that not contained in Seq2SeqTrainer. 75 | """ 76 | if not self.is_world_process_zero(): 77 | return 78 | 79 | output_prediction_file = os.path.join(self.args.output_dir, "generated_predictions.jsonl") 80 | logger.info(f"Saving prediction results to {output_prediction_file}") 81 | 82 | labels = np.where(predict_results.label_ids != IGNORE_INDEX, predict_results.label_ids, self.tokenizer.pad_token_id) 83 | preds = np.where(predict_results.predictions != IGNORE_INDEX, predict_results.predictions, self.tokenizer.pad_token_id) 84 | 85 | for i in range(len(preds)): 86 | pad_len = np.nonzero(preds[i] != self.tokenizer.pad_token_id)[0] 87 | if len(pad_len): 88 | preds[i] = np.concatenate((preds[i][pad_len[0]:], preds[i][:pad_len[0]]), axis=-1) # move pad token to last 89 | 90 | decoded_labels = self.tokenizer.batch_decode(labels, skip_special_tokens=True, clean_up_tokenization_spaces=False) 91 | decoded_preds = self.tokenizer.batch_decode(preds, skip_special_tokens=True, clean_up_tokenization_spaces=True) 92 | 93 | with open(output_prediction_file, "w", encoding="utf-8") as writer: 94 | res: List[str] = [] 95 | for label, pred in zip(decoded_labels, decoded_preds): 96 | res.append(json.dumps({"label": label, "predict": pred}, ensure_ascii=False)) 97 | writer.write("\n".join(res)) 98 | -------------------------------------------------------------------------------- /src/llmtuner/webui/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import gradio as gr 4 | from collections import defaultdict 5 | from typing import Any, Dict, Optional 6 | from peft.utils import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME 7 | 8 | from llmtuner.extras.constants import ( 9 | DEFAULT_MODULE, 10 | DEFAULT_TEMPLATE, 11 | PEFT_METHODS, 12 | SUPPORTED_MODELS, 13 | TRAINING_STAGES, 14 | DownloadSource 15 | ) 16 | from llmtuner.extras.misc import use_modelscope 17 | from llmtuner.hparams.data_args import DATA_CONFIG 18 | 19 | 20 | ADAPTER_NAMES = {WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME} 21 | DEFAULT_CACHE_DIR = "cache" 22 | DEFAULT_DATA_DIR = "data" 23 | DEFAULT_SAVE_DIR = "saves" 24 | USER_CONFIG = "user.config" 25 | 26 | 27 | def get_save_dir(*args) -> os.PathLike: 28 | return os.path.join(DEFAULT_SAVE_DIR, *args) 29 | 30 | 31 | def get_config_path() -> os.PathLike: 32 | return os.path.join(DEFAULT_CACHE_DIR, USER_CONFIG) 33 | 34 | 35 | def load_config() -> Dict[str, Any]: 36 | try: 37 | with open(get_config_path(), "r", encoding="utf-8") as f: 38 | return json.load(f) 39 | except: 40 | return {"lang": None, "last_model": None, "path_dict": {}, "cache_dir": None} 41 | 42 | 43 | def save_config(lang: str, model_name: Optional[str] = None, model_path: Optional[str] = None) -> None: 44 | os.makedirs(DEFAULT_CACHE_DIR, exist_ok=True) 45 | user_config = load_config() 46 | user_config["lang"] = lang or user_config["lang"] 47 | if model_name: 48 | user_config["last_model"] = model_name 49 | user_config["path_dict"][model_name] = model_path 50 | with open(get_config_path(), "w", encoding="utf-8") as f: 51 | json.dump(user_config, f, indent=2, ensure_ascii=False) 52 | 53 | 54 | def get_model_path(model_name: str) -> str: 55 | user_config = load_config() 56 | path_dict: Dict[DownloadSource, str] = SUPPORTED_MODELS.get(model_name, defaultdict(str)) 57 | model_path = user_config["path_dict"].get(model_name, None) or path_dict.get(DownloadSource.DEFAULT, None) 58 | if ( 59 | use_modelscope() 60 | and path_dict.get(DownloadSource.MODELSCOPE) 61 | and model_path == path_dict.get(DownloadSource.DEFAULT) 62 | ): # replace path 63 | model_path = path_dict.get(DownloadSource.MODELSCOPE) 64 | return model_path 65 | 66 | 67 | def get_prefix(model_name: str) -> str: 68 | return model_name.split("-")[0] 69 | 70 | 71 | def get_module(model_name: str) -> str: 72 | return DEFAULT_MODULE.get(get_prefix(model_name), "q_proj,v_proj") 73 | 74 | 75 | def get_template(model_name: str) -> str: 76 | if model_name and model_name.endswith("Chat") and get_prefix(model_name) in DEFAULT_TEMPLATE: 77 | return DEFAULT_TEMPLATE[get_prefix(model_name)] 78 | return "default" 79 | 80 | 81 | def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]: 82 | if finetuning_type not in PEFT_METHODS: 83 | return gr.update(value=[], choices=[], interactive=False) 84 | 85 | adapters = [] 86 | if model_name and finetuning_type == "lora": 87 | save_dir = get_save_dir(model_name, finetuning_type) 88 | if save_dir and os.path.isdir(save_dir): 89 | for adapter in os.listdir(save_dir): 90 | if ( 91 | os.path.isdir(os.path.join(save_dir, adapter)) 92 | and any([os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES]) 93 | ): 94 | adapters.append(adapter) 95 | return gr.update(value=[], choices=adapters, interactive=True) 96 | 97 | 98 | def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]: 99 | try: 100 | with open(os.path.join(dataset_dir, DATA_CONFIG), "r", encoding="utf-8") as f: 101 | return json.load(f) 102 | except Exception as err: 103 | print("Cannot open {} due to {}.".format(os.path.join(dataset_dir, DATA_CONFIG), str(err))) 104 | return {} 105 | 106 | 107 | def list_dataset( 108 | dataset_dir: Optional[str] = None, training_stage: Optional[str] = list(TRAINING_STAGES.keys())[0] 109 | ) -> Dict[str, Any]: 110 | dataset_info = load_dataset_info(dataset_dir if dataset_dir is not None else DEFAULT_DATA_DIR) 111 | ranking = TRAINING_STAGES[training_stage] in ["rm", "dpo"] 112 | datasets = [k for k, v in dataset_info.items() if v.get("ranking", False) == ranking] 113 | return gr.update(value=[], choices=datasets) 114 | -------------------------------------------------------------------------------- /src/llmtuner/train/sft/workflow.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/huggingface/transformers/blob/v4.34.1/examples/pytorch/summarization/run_summarization.py 2 | 3 | from typing import TYPE_CHECKING, Optional, List 4 | from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments 5 | 6 | from llmtuner.data import get_dataset, preprocess_dataset, split_dataset 7 | from llmtuner.extras.constants import IGNORE_INDEX 8 | from llmtuner.extras.misc import get_logits_processor 9 | from llmtuner.extras.ploting import plot_loss 10 | from llmtuner.model import load_model_and_tokenizer 11 | from llmtuner.train.sft.metric import ComputeMetrics 12 | from llmtuner.train.sft.trainer import CustomSeq2SeqTrainer 13 | from llmtuner.train.utils import create_modelcard_and_push 14 | 15 | if TYPE_CHECKING: 16 | from transformers import TrainerCallback 17 | from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments 18 | 19 | 20 | def run_sft( 21 | model_args: "ModelArguments", 22 | data_args: "DataArguments", 23 | training_args: "Seq2SeqTrainingArguments", 24 | finetuning_args: "FinetuningArguments", 25 | generating_args: "GeneratingArguments", 26 | callbacks: Optional[List["TrainerCallback"]] = None 27 | ): 28 | dataset = get_dataset(model_args, data_args) 29 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train) 30 | dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft") 31 | 32 | if training_args.predict_with_generate: 33 | tokenizer.padding_side = "left" # use left-padding in generation 34 | 35 | if getattr(model, "is_quantized", False) and not training_args.do_train: 36 | setattr(model, "_hf_peft_config_loaded", True) # hack here: make model compatible with prediction 37 | 38 | data_collator = DataCollatorForSeq2Seq( 39 | tokenizer=tokenizer, 40 | pad_to_multiple_of=8 if tokenizer.padding_side == "right" else None, # for shift short attention 41 | label_pad_token_id=IGNORE_INDEX if data_args.ignore_pad_token_for_loss else tokenizer.pad_token_id 42 | ) 43 | 44 | # Override the decoding parameters of Seq2SeqTrainer 45 | training_args_dict = training_args.to_dict() 46 | training_args_dict.update(dict( 47 | generation_max_length=training_args.generation_max_length or data_args.cutoff_len, 48 | generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams 49 | )) 50 | training_args = Seq2SeqTrainingArguments(**training_args_dict) 51 | 52 | # Initialize our Trainer 53 | trainer = CustomSeq2SeqTrainer( 54 | model=model, 55 | args=training_args, 56 | tokenizer=tokenizer, 57 | data_collator=data_collator, 58 | callbacks=callbacks, 59 | compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None, 60 | **split_dataset(dataset, data_args, training_args) 61 | ) 62 | 63 | # Keyword arguments for `model.generate` 64 | gen_kwargs = generating_args.to_dict() 65 | gen_kwargs["eos_token_id"] = [tokenizer.eos_token_id] + tokenizer.additional_special_tokens_ids 66 | gen_kwargs["pad_token_id"] = tokenizer.pad_token_id 67 | gen_kwargs["logits_processor"] = get_logits_processor() 68 | 69 | # Training 70 | if training_args.do_train: 71 | train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint) 72 | trainer.save_model() 73 | trainer.log_metrics("train", train_result.metrics) 74 | trainer.save_metrics("train", train_result.metrics) 75 | trainer.save_state() 76 | if trainer.is_world_process_zero() and finetuning_args.plot_loss: 77 | plot_loss(training_args.output_dir, keys=["loss", "eval_loss"]) 78 | 79 | # Evaluation 80 | if training_args.do_eval: 81 | metrics = trainer.evaluate(metric_key_prefix="eval", **gen_kwargs) 82 | if training_args.predict_with_generate: # eval_loss will be wrong if predict_with_generate is enabled 83 | metrics.pop("eval_loss", None) 84 | trainer.log_metrics("eval", metrics) 85 | trainer.save_metrics("eval", metrics) 86 | 87 | # Predict 88 | if training_args.do_predict: 89 | predict_results = trainer.predict(dataset, metric_key_prefix="predict", **gen_kwargs) 90 | if training_args.predict_with_generate: # predict_loss will be wrong if predict_with_generate is enabled 91 | predict_results.metrics.pop("predict_loss", None) 92 | trainer.log_metrics("predict", predict_results.metrics) 93 | trainer.save_metrics("predict", predict_results.metrics) 94 | trainer.save_predictions(predict_results) 95 | 96 | # Create model card 97 | create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args) 98 | -------------------------------------------------------------------------------- /src/llmtuner/train/rm/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union 5 | from transformers import Trainer 6 | 7 | from llmtuner.extras.logging import get_logger 8 | 9 | if TYPE_CHECKING: 10 | from transformers.trainer import PredictionOutput 11 | from transformers.modeling_utils import PreTrainedModel 12 | 13 | 14 | logger = get_logger(__name__) 15 | 16 | 17 | class PairwiseTrainer(Trainer): 18 | r""" 19 | Inherits PeftTrainer to compute pairwise loss. 20 | """ 21 | 22 | def __init__(self, *args, **kwargs): 23 | super().__init__(*args, **kwargs) 24 | self.can_return_loss = True # override property to return eval_loss 25 | 26 | def compute_loss( 27 | self, 28 | model: "PreTrainedModel", 29 | inputs: Dict[str, torch.Tensor], 30 | return_outputs: Optional[bool] = False 31 | ) -> Union[torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor]]]: 32 | r""" 33 | Computes pairwise loss. The first n examples are chosen and the last n examples are rejected. 34 | 35 | Subclass and override to inject custom behavior. 36 | 37 | Note that the first element will be removed from the output tuple. 38 | See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509 39 | """ 40 | # Compute rewards 41 | _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) 42 | 43 | unwrapped_model: "PreTrainedModel" = self.accelerator.unwrap_model(self.model) 44 | if getattr(unwrapped_model.config, "model_type", None) == "chatglm": 45 | values = torch.transpose(values, 0, 1) 46 | 47 | # Split the inputs and rewards into two parts, chosen and rejected 48 | batch_size = inputs["input_ids"].size(0) // 2 49 | chosen_input_ids, rejected_input_ids = inputs["input_ids"][:batch_size], inputs["input_ids"][batch_size:] 50 | chosen_rewards, rejected_rewards = values[:batch_size], values[batch_size:] 51 | chosen_scores, rejected_scores = [], [] 52 | 53 | # Compute pairwise loss. Only backprop on the different tokens before padding 54 | # Inspired by: https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/reward_model.py 55 | loss = 0 56 | for i in range(batch_size): 57 | chosen_length = (chosen_input_ids[i] != self.tokenizer.pad_token_id).nonzero()[-1] + 1 58 | rejected_length = (rejected_input_ids[i] != self.tokenizer.pad_token_id).nonzero()[-1] + 1 59 | check_divergence = (chosen_input_ids[i] != rejected_input_ids[i]).nonzero() 60 | 61 | if len(check_divergence) == 0: 62 | end_index = chosen_length 63 | div_index = end_index - 1 64 | else: 65 | end_index = max(chosen_length, rejected_length) 66 | div_index = check_divergence[0] 67 | 68 | assert div_index > 0 69 | chosen_trunc_rewards = chosen_rewards[i, div_index:end_index] 70 | rejected_trunc_rewards = rejected_rewards[i, div_index:end_index] 71 | if return_outputs: # use the score on the last token except pad token for inference 72 | chosen_scores.append(chosen_rewards[i, chosen_length-1]) 73 | rejected_scores.append(rejected_rewards[i, rejected_length-1]) 74 | loss += -torch.nn.functional.logsigmoid(chosen_trunc_rewards - rejected_trunc_rewards).mean() 75 | 76 | loss = loss / batch_size 77 | if return_outputs: 78 | chosen_scores, rejected_scores = torch.stack(chosen_scores), torch.stack(rejected_scores) 79 | return loss, [loss, chosen_scores, rejected_scores] 80 | 81 | return loss 82 | 83 | def save_predictions( 84 | self, 85 | predict_results: "PredictionOutput" 86 | ) -> None: 87 | r""" 88 | Saves model predictions to `output_dir`. 89 | 90 | A custom behavior that not contained in Seq2SeqTrainer. 91 | """ 92 | if not self.is_world_process_zero(): 93 | return 94 | 95 | output_prediction_file = os.path.join(self.args.output_dir, "generated_predictions.jsonl") 96 | logger.info(f"Saving prediction results to {output_prediction_file}") 97 | chosen_scores, rejected_scores = predict_results.predictions 98 | 99 | with open(output_prediction_file, "w", encoding="utf-8") as writer: 100 | res: List[str] = [] 101 | for c_score, r_score in zip(chosen_scores, rejected_scores): 102 | res.append(json.dumps({"chosen": round(float(c_score), 2), "rejected": round(float(r_score), 2)})) 103 | writer.write("\n".join(res)) 104 | -------------------------------------------------------------------------------- /src/llmtuner/train/ppo/workflow.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/lvwerra/trl/blob/main/examples/research_projects/stack_llama/scripts/rl_training.py 2 | 3 | import math 4 | from trl import PPOConfig 5 | from torch.optim import AdamW 6 | from typing import TYPE_CHECKING, Optional, List 7 | from transformers import DataCollatorWithPadding 8 | from transformers.optimization import get_scheduler 9 | 10 | from llmtuner.data import get_dataset, preprocess_dataset 11 | from llmtuner.extras.callbacks import SavePeftModelCallback 12 | from llmtuner.extras.ploting import plot_loss 13 | from llmtuner.model import load_model_and_tokenizer 14 | from llmtuner.train.utils import create_ref_model, create_reward_model 15 | from llmtuner.train.ppo.trainer import CustomPPOTrainer 16 | 17 | if TYPE_CHECKING: 18 | from transformers import Seq2SeqTrainingArguments, TrainerCallback 19 | from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments 20 | 21 | 22 | def run_ppo( 23 | model_args: "ModelArguments", 24 | data_args: "DataArguments", 25 | training_args: "Seq2SeqTrainingArguments", 26 | finetuning_args: "FinetuningArguments", 27 | generating_args: "GeneratingArguments", 28 | callbacks: Optional[List["TrainerCallback"]] = None 29 | ): 30 | dataset = get_dataset(model_args, data_args) 31 | model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, add_valuehead=True) 32 | dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="ppo") 33 | 34 | tokenizer.padding_side = "left" # use left-padding in generation while using right-padding in training 35 | data_collator = DataCollatorWithPadding(tokenizer=tokenizer) 36 | 37 | # Create reference model and reward model 38 | ref_model = create_ref_model(model_args, finetuning_args, add_valuehead=True) 39 | reward_model = create_reward_model(model, model_args, finetuning_args) 40 | 41 | # Create ppo config 42 | backward_batch_size = training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps 43 | ppo_config = PPOConfig( 44 | model_name=model_args.model_name_or_path, 45 | learning_rate=training_args.learning_rate, 46 | mini_batch_size=training_args.per_device_train_batch_size, 47 | batch_size=backward_batch_size * finetuning_args.ppo_buffer_size, 48 | gradient_accumulation_steps=training_args.gradient_accumulation_steps, 49 | ppo_epochs=finetuning_args.ppo_epochs, 50 | max_grad_norm=training_args.max_grad_norm, 51 | seed=training_args.seed, 52 | optimize_device_cache=True, 53 | target=finetuning_args.ppo_target, 54 | log_with=finetuning_args.ppo_logger, 55 | use_score_scaling=finetuning_args.ppo_score_norm, 56 | use_score_norm=finetuning_args.ppo_score_norm, 57 | whiten_rewards=finetuning_args.ppo_whiten_rewards, 58 | accelerator_kwargs={"step_scheduler_with_optimizer": False} 59 | ) 60 | 61 | # Create optimizer and scheduler 62 | optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate) 63 | if training_args.max_steps > 0: 64 | num_training_steps = training_args.max_steps 65 | else: 66 | total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size 67 | num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size) 68 | 69 | lr_scheduler = get_scheduler( 70 | training_args.lr_scheduler_type, 71 | optimizer=optimizer, 72 | num_warmup_steps=training_args.get_warmup_steps(num_training_steps), 73 | num_training_steps=num_training_steps 74 | ) 75 | 76 | # Initialize our Trainer 77 | ppo_trainer = CustomPPOTrainer( 78 | model_args=model_args, 79 | training_args=training_args, 80 | finetuning_args=finetuning_args, 81 | generating_args=generating_args, 82 | callbacks=callbacks + [SavePeftModelCallback()], 83 | reward_model=reward_model, 84 | config=ppo_config, 85 | model=model, 86 | ref_model=ref_model, 87 | tokenizer=tokenizer, 88 | dataset=dataset, 89 | data_collator=data_collator, 90 | optimizer=optimizer, 91 | lr_scheduler=lr_scheduler 92 | ) 93 | 94 | # Training 95 | if training_args.do_train: 96 | ppo_trainer.ppo_train(resume_from_checkpoint=training_args.resume_from_checkpoint) 97 | ppo_trainer.save_model() 98 | ppo_trainer.save_state() # must be called after save_model to have a folder 99 | if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss: 100 | plot_loss(training_args.output_dir, keys=["loss", "reward"]) 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |

6 | 7 |

8 | 9 | **** 10 | 11 | ### 模型简介 12 | 后稷•首个中文开源农业大模型是由山西大学、山西农业大学、[The Fin AI](https://github.com/The-FinAI)联合研发,以[Baichuan](https://huggingface.co/baichuan-inc)为底座,基于海量有监督农业领域相关数据微调,具备广泛的农业知识和智能分析能力,该模型旨在为农业领域提供全面而高效的信息处理和决策支持。 13 | ### 说明 14 | 1. 本项目仍在不断迭代更新,考虑到农业领域覆盖广泛,且存在领域数据集资源严重匮乏的问题,我们目前仅发布了在作物学相关问答数据集上的微调模型,其他领域模型我们会在后续陆续公布; 15 | 2. 本项目所提供的数据与模型仅供科研使用,严禁用于商业用途; 16 | ### 新闻 17 | - [**2024.06.08**] 👋 👋 👋 [稷丰-首个开源中文农业多模态大模型](https://github.com/zhiweihu1103/AgriAgent)发布,欢迎大家关注; 18 | - [**2024.02.22**] 👋 👋 👋 后稷-v1.0版本模型发布,目前模型已支持作物学领域; 19 | ### 更新计划 20 | - [ ] 覆盖更多农业相关领域, 目前仅支持作物学领域,后续将会更新园艺学、农业资源利用、植物保护、畜牧学、兽医学、草业科学、林学以及水产等学科; 21 | - [ ] 基于农业知识图谱、农业教材以及期刊文章预训练模型; 22 | - [ ] 开源预训练数据以及指令微调数据; 23 | - [ ] 构建农业领域评测Benchmark; 24 | - [ ] 发布AgriMa技术评测报告; 25 | ### 部分测试 26 | 问题 | Baichuan | AgriMa 27 | :--: | :--: | :--: 28 | 小麦浇灌越冬水有什么作用?应注意哪些事项?| 浇灌越冬水,又称冬灌,一般在11月中下旬,结合冬前中耕,对小麦进行灌溉。冬灌可以提高小麦的抗寒能力,增强小麦的抗病性,对小麦的生长发育具有积极作用。小麦浇灌越冬水,有什么作用?应注意哪些事项? 1、小麦浇灌越冬水,有什么作用? 浇灌越冬水,可以提高小麦的抗寒能力,增强小麦的抗病性,对小麦的生长发育具有积极作用。小麦浇灌越冬水,可以使小麦根系在土壤中生长时间更长,吸收土壤中的水分,使小麦的根系发育更旺盛,从而提高小麦的抗旱能力。 2、浇灌越冬水,应注意哪些事项? 浇灌越冬水,应注意浇水量,不宜过多,以免降低小麦的抗寒能力。浇灌越冬水,应注意浇灌时间,浇灌时间不宜过早,以免小麦根系遭受冻害。浇灌越冬水,应注意浇灌深度,浇灌深度应根据土壤的实际情况确定,不宜过深,以免影响小麦的生长。 3、小麦浇灌越冬水,应注意哪些事项? 浇灌越冬水,应注意浇灌方式,浇灌方式应根据小麦的生长情况确定,一般采用沟灌或膜下滴灌。浇灌越冬水,应注意浇灌方式,避免造成小麦根系的损伤,影响小麦的生长。浇灌越冬水,应注意浇灌时间,避免在冬季气温过低时浇灌,以免造成小麦根系冻害。 4、小麦浇灌越冬水,应注意哪些事项? 浇灌越冬水,应注意浇灌深度,避免造成小麦根系损伤,影响小麦的生长。浇灌越冬水,应注意浇灌次数,避免造成小麦根系的损伤,影响小麦的生长。浇灌越冬水,应注意浇灌次数,避免造成小麦根系的损伤,影响小麦的生长。浇灌越冬水,应注意浇灌次数,避免造成小麦根系的损伤,影响小麦的生长。浇灌越冬水,应注意浇灌次数,避免造成小麦根系的损伤,影响小麦的生长。| 越冬水是小麦生长过程中不可缺少的环节,其作用主要有以下几点:1.可以提高土壤的保水能力,使土壤更加稳定,有利于小麦安全越冬。2.可以补充土壤中的水分,保证小麦的正常生长,提高小麦的抗旱能力。3.可以改善土壤结构,提高土壤的肥力,促进小麦生长。4.可以调节土壤的酸碱度,保持土壤的平衡状态。小麦浇灌越冬水需要注意以下几点:1.选择合适的灌溉时间。一般来说,越冬水应在小麦生长过程中进行,以保证小麦生长需要的水分。2.灌溉量要适当。过量灌溉会导致土壤过湿,不利于小麦的生长。3.灌溉方式要合理。可以选择滴灌、喷灌等方式,提高灌溉效率。4.灌溉过程中要注意安全,避免出现事故。 29 | 大豆茎叶生长茂盛,但是不结荚、空荚、瘪粒,是什么原因?| 1、大豆营养生长旺盛,生殖生长受到抑制,所以造成空荚、秕粒。2、大豆植株长势过旺,养分大量向茎叶输送,不向籽粒输送,导致空荚。3、大豆营养生长与生殖生长失调,造成空荚、秕粒。4、土壤水分过多,大豆根系吸收能力减弱,养分输送不畅,造成空荚。5、土壤缺硼、缺锌,大豆花芽分化时,花粉管不能正常发育,导致花粉败育,不能受精结荚。6、土壤中氮肥过多,导致大豆徒长,植株瘦弱,抗逆性差,空荚、秕粒多。7、土壤中磷肥过多,导致大豆荚多籽粒少,空荚、秕粒多。8、土壤中缺钙,影响大豆根系吸收,造成空荚、秕粒多。9、土壤中缺硼,影响大豆花芽分化,花粉管不能正常发育,导致花粉败育,不能受精结荚。10、土壤中缺锌,影响大豆花芽分化,花粉管不能正常发育,导致花粉败育,不能受精结荚。11、土壤中缺钾,影响大豆光合作用,导致大豆植株徒长,瘦弱,空荚、秕粒多。12、土壤中缺钼,影响大豆根瘤菌发育,导致大豆生长缓慢,植株矮小,空荚、秕粒多。13、土壤中缺铁,影响大豆叶绿素合成,导致叶片黄化,空荚、秕粒多。14、土壤中缺锰,影响大豆光合作用,导致植株生长缓慢,叶片黄化,空荚、秕粒多。15、土壤中缺钙,影响大豆光合作用,导致植株生长缓慢,叶片黄化,空荚、秕粒多。16、土壤中缺硫,影响大豆叶绿素合成,导致叶片黄化,空荚、秕粒多。17、土壤中缺锌,影响大豆花芽分化,花粉管不能正常发育,导致花粉败育,不能受精结荚。 | 营养供应不足。大豆生长需要充足的养分,如果养分供应不足,就会导致茎叶生长茂盛,但是不结荚、空荚、瘪粒。这种情况可以通过加强施肥、合理施肥等措施来解决。光照不足。大豆生长需要充足的光照,如果光照不足,就会导致茎叶生长茂盛,但是不结荚、空荚、瘪粒。这种情况可以通过加强通风、合理遮阴等措施来解决。病虫害影响。大豆茎叶生长茂盛,但是不结荚、空荚、瘪粒,也可能是受到了病虫害的影响。这种情况可以通过加强病虫害防治等措施来解决。 30 | 大白菜在生长发育过程中如何防止烧心病的发生?| 1、选用抗病品种。2、大白菜施肥:在基肥施足的基础上,追肥宜少施勤施,避免过量施用氮肥,增施钾肥。3、加强大白菜的栽培管理:1大白菜要与其他作物轮作,避免连作。2大白菜播种后及时覆盖地膜,并浇水,促进大白菜的快速生长。3及时中耕除草,提高地温,促进根系发育。4大白菜在生长过程中,发现有病株及时拔除,并带出田外烧毁。5大白菜生长期间,要根据大白菜的长势及时追肥,以保证大白菜的正常生长。6大白菜收获后,及时清理田间残株病叶,集中烧毁,以减少病源。7大白菜收获后,及时浇水,以利于大白菜的越冬。8大白菜在生长期间,遇到干旱天气,要及时浇水,保持土壤湿润,促进大白菜的正常生长。9大白菜收获后,及时清除田间残株病叶,集中烧毁,以减少病源。10大白菜收获后,及时浇水,以利于大白菜的越冬。11大白菜在生长过程中,发现有病株及时拔除,并带出田外烧毁。12大白菜生长期间,要根据大白菜的长势及时追肥,以保证大白菜的正常生长。13大白菜收获后,及时清理田间残株病叶,集中烧毁,以减少病源。14大白菜收获后,及时浇水,以利于大白菜的越冬。15大白菜在生长期间,遇到干旱天气,要及时浇水,保持土壤湿润,促进大白菜的正常生长。16大白菜收获后,及时清除田间残株病叶,集中烧毁,以减少病源。17大白菜收获后,及时浇水,以利于大白菜的越冬。18大白菜在生长期间,发现有病株及时拔除,并带出田外烧毁。19大白菜生长期间,要根据大白菜的长势及时追肥,以保证大白菜的正常生长。| 烧心病是一种由大白菜心腐病菌引起的病害,其病原菌可以在土壤中存活,因此,在大白菜的生长发育过程中,应注意以下几点:1.选择健康的种子。选择健康的种子,可以减少病原菌的数量,从而降低烧心病的发生率。2.科学施肥。科学施肥可以提高大白菜的抗病能力,降低烧心病的发生率。3.合理浇水。合理浇水可以保持土壤的湿度,避免土壤过干或过湿,从而减少病原菌的存活机会。4.及时清除病株。及时清除病株可以减少病原菌的传播,从而降低烧心病的发生率。 31 | ### 模型训练 32 | 1. 开发环境搭建 33 | ```python 34 | conda create -n agrima python=3.10 35 | conda activate agrima 36 | cd AgriMa 37 | pip install -r requirements.txt 38 | ``` 39 | 2. 指令集构建 40 | * 按照[此说明](https://github.com/hiyouga/LLaMA-Factory/blob/main/data/README_zh.md)进行指令集构建; 41 | * 如需获取AgriMa微调指令集,请发送邮件至zhiweihu@whu.edu.cn与我们联系。 42 | 3. 模型微调 43 | ```python 44 | sh start_train.sh 45 | ``` 46 | * 需要首先下载预训练权重,如[Baichuan](https://huggingface.co/baichuan-inc); 47 | * 需要根据你的服务器地址修改对应的:LOG_PATH, OUTPUT_DIR, MODEL_NAME_OR_PATH地址; 48 | 4. Web页面测试 49 | ```python 50 | sh start_web_demo.sh 51 | ``` 52 | * 需要修改model_name_or_path位置; 53 | 54 | ### 项目参与者 55 | 本项目由山西大学、山西农业大学、[The Fin AI](https://github.com/The-FinAI)联合开发完成 56 | 57 | 项目主要开发人员:[胡志伟](https://github.com/zhiweihu1103)、[闫智超](https://github.com/yzc111)、[马博翔](https://github.com/MattMaBX)、[黄济民 (The Fin AI)](https://github.com/jiminHuang)、[韩玮光 (The Fin AI)](https://github.com/tothemoon96) 58 | 59 | 指导教师:李茹(教授) 60 | 61 | 若有相关使用需求或者相关数据集提供,欢迎与我们取得联系:zhiweihu@whu.edu.cn 62 | ### 致谢 63 | 1. 本项目基于现有开源项目二次开发,在此对相关项目和研发人员表示感谢。 64 | * [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) 65 | ## Star History 66 | 67 | 68 | 69 | Star History Chart 70 | 71 | -------------------------------------------------------------------------------- /src/llmtuner/webui/chatter.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from gradio.components import Component # cannot use TYPE_CHECKING here 3 | from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple 4 | 5 | from llmtuner.chat import ChatModel 6 | from llmtuner.extras.misc import torch_gc 7 | from llmtuner.hparams import GeneratingArguments 8 | from llmtuner.webui.common import get_save_dir 9 | from llmtuner.webui.locales import ALERTS 10 | 11 | if TYPE_CHECKING: 12 | from llmtuner.webui.manager import Manager 13 | 14 | 15 | class WebChatModel(ChatModel): 16 | 17 | def __init__( 18 | self, 19 | manager: "Manager", 20 | demo_mode: Optional[bool] = False, 21 | lazy_init: Optional[bool] = True 22 | ) -> None: 23 | self.manager = manager 24 | self.demo_mode = demo_mode 25 | self.model = None 26 | self.tokenizer = None 27 | self.generating_args = GeneratingArguments() 28 | 29 | if not lazy_init: # read arguments from command line 30 | super().__init__() 31 | 32 | if demo_mode: # load demo_config.json if exists 33 | import json 34 | try: 35 | with open("demo_config.json", "r", encoding="utf-8") as f: 36 | args = json.load(f) 37 | assert args.get("model_name_or_path", None) and args.get("template", None) 38 | super().__init__(args) 39 | except AssertionError: 40 | print("Please provided model name and template in `demo_config.json`.") 41 | except: 42 | print("Cannot find `demo_config.json` at current directory.") 43 | 44 | @property 45 | def loaded(self) -> bool: 46 | return self.model is not None 47 | 48 | def load_model(self, data: Dict[Component, Any]) -> Generator[str, None, None]: 49 | get = lambda name: data[self.manager.get_elem_by_name(name)] 50 | lang = get("top.lang") 51 | error = "" 52 | if self.loaded: 53 | error = ALERTS["err_exists"][lang] 54 | elif not get("top.model_name"): 55 | error = ALERTS["err_no_model"][lang] 56 | elif not get("top.model_path"): 57 | error = ALERTS["err_no_path"][lang] 58 | elif self.demo_mode: 59 | error = ALERTS["err_demo"][lang] 60 | 61 | if error: 62 | gr.Warning(error) 63 | yield error 64 | return 65 | 66 | if get("top.adapter_path"): 67 | adapter_name_or_path = ",".join([ 68 | get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter) 69 | for adapter in get("top.adapter_path")]) 70 | else: 71 | adapter_name_or_path = None 72 | 73 | yield ALERTS["info_loading"][lang] 74 | args = dict( 75 | model_name_or_path=get("top.model_path"), 76 | adapter_name_or_path=adapter_name_or_path, 77 | finetuning_type=get("top.finetuning_type"), 78 | quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None, 79 | template=get("top.template"), 80 | flash_attn=(get("top.booster") == "flash_attn"), 81 | use_unsloth=(get("top.booster") == "unsloth"), 82 | rope_scaling=get("top.rope_scaling") if get("top.rope_scaling") in ["linear", "dynamic"] else None 83 | ) 84 | super().__init__(args) 85 | 86 | yield ALERTS["info_loaded"][lang] 87 | 88 | def unload_model(self, data: Dict[Component, Any]) -> Generator[str, None, None]: 89 | lang = data[self.manager.get_elem_by_name("top.lang")] 90 | 91 | if self.demo_mode: 92 | gr.Warning(ALERTS["err_demo"][lang]) 93 | yield ALERTS["err_demo"][lang] 94 | return 95 | 96 | yield ALERTS["info_unloading"][lang] 97 | self.model = None 98 | self.tokenizer = None 99 | torch_gc() 100 | yield ALERTS["info_unloaded"][lang] 101 | 102 | def predict( 103 | self, 104 | chatbot: List[Tuple[str, str]], 105 | query: str, 106 | history: List[Tuple[str, str]], 107 | system: str, 108 | max_new_tokens: int, 109 | top_p: float, 110 | temperature: float 111 | ) -> Generator[Tuple[List[Tuple[str, str]], List[Tuple[str, str]]], None, None]: 112 | chatbot.append([query, ""]) 113 | response = "" 114 | for new_text in self.stream_chat( 115 | query, history, system, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature 116 | ): 117 | response += new_text 118 | new_history = history + [(query, response)] 119 | chatbot[-1] = [query, self.postprocess(response)] 120 | yield chatbot, new_history 121 | 122 | def postprocess(self, response: str) -> str: 123 | blocks = response.split("```") 124 | for i, block in enumerate(blocks): 125 | if i % 2 == 0: 126 | blocks[i] = block.replace("<", "<").replace(">", ">") 127 | return "```".join(blocks) 128 | -------------------------------------------------------------------------------- /evaluation/ceval/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "accountant": { 3 | "name": "注册会计师", 4 | "category": "Other" 5 | }, 6 | "advanced_mathematics": { 7 | "name": "高等数学", 8 | "category": "STEM" 9 | }, 10 | "art_studies": { 11 | "name": "艺术学", 12 | "category": "Humanities" 13 | }, 14 | "basic_medicine": { 15 | "name": "基础医学", 16 | "category": "Other" 17 | }, 18 | "business_administration": { 19 | "name": "工商管理", 20 | "category": "Social Sciences" 21 | }, 22 | "chinese_language_and_literature": { 23 | "name": "中国语言文学", 24 | "category": "Humanities" 25 | }, 26 | "civil_servant": { 27 | "name": "公务员", 28 | "category": "Other" 29 | }, 30 | "clinical_medicine": { 31 | "name": "临床医学", 32 | "category": "Other" 33 | }, 34 | "college_chemistry": { 35 | "name": "大学化学", 36 | "category": "STEM" 37 | }, 38 | "college_economics": { 39 | "name": "大学经济学", 40 | "category": "Social Sciences" 41 | }, 42 | "college_physics": { 43 | "name": "大学物理", 44 | "category": "STEM" 45 | }, 46 | "college_programming": { 47 | "name": "大学编程", 48 | "category": "STEM" 49 | }, 50 | "computer_architecture": { 51 | "name": "计算机组成", 52 | "category": "STEM" 53 | }, 54 | "computer_network": { 55 | "name": "计算机网络", 56 | "category": "STEM" 57 | }, 58 | "discrete_mathematics": { 59 | "name": "离散数学", 60 | "category": "STEM" 61 | }, 62 | "education_science": { 63 | "name": "教育学", 64 | "category": "Social Sciences" 65 | }, 66 | "electrical_engineer": { 67 | "name": "注册电气工程师", 68 | "category": "STEM" 69 | }, 70 | "environmental_impact_assessment_engineer": { 71 | "name": "环境影响评价工程师", 72 | "category": "Other" 73 | }, 74 | "fire_engineer": { 75 | "name": "注册消防工程师", 76 | "category": "Other" 77 | }, 78 | "high_school_biology": { 79 | "name": "高中生物", 80 | "category": "STEM" 81 | }, 82 | "high_school_chemistry": { 83 | "name": "高中化学", 84 | "category": "STEM" 85 | }, 86 | "high_school_chinese": { 87 | "name": "高中语文", 88 | "category": "Humanities" 89 | }, 90 | "high_school_geography": { 91 | "name": "高中地理", 92 | "category": "Social Sciences" 93 | }, 94 | "high_school_history": { 95 | "name": "高中历史", 96 | "category": "Humanities" 97 | }, 98 | "high_school_mathematics": { 99 | "name": "高中数学", 100 | "category": "STEM" 101 | }, 102 | "high_school_physics": { 103 | "name": "高中物理", 104 | "category": "STEM" 105 | }, 106 | "high_school_politics": { 107 | "name": "高中政治", 108 | "category": "Social Sciences" 109 | }, 110 | "ideological_and_moral_cultivation": { 111 | "name": "思想道德修养与法律基础", 112 | "category": "Humanities" 113 | }, 114 | "law": { 115 | "name": "法学", 116 | "category": "Humanities" 117 | }, 118 | "legal_professional": { 119 | "name": "法律职业资格", 120 | "category": "Humanities" 121 | }, 122 | "logic": { 123 | "name": "逻辑学", 124 | "category": "Humanities" 125 | }, 126 | "mao_zedong_thought": { 127 | "name": "毛泽东思想和中国特色社会主义理论体系概论", 128 | "category": "Social Sciences" 129 | }, 130 | "marxism": { 131 | "name": "马克思主义基本原理", 132 | "category": "Social Sciences" 133 | }, 134 | "metrology_engineer": { 135 | "name": "注册计量师", 136 | "category": "STEM" 137 | }, 138 | "middle_school_biology": { 139 | "name": "初中生物", 140 | "category": "STEM" 141 | }, 142 | "middle_school_chemistry": { 143 | "name": "初中化学", 144 | "category": "STEM" 145 | }, 146 | "middle_school_geography": { 147 | "name": "初中地理", 148 | "category": "Social Sciences" 149 | }, 150 | "middle_school_history": { 151 | "name": "初中历史", 152 | "category": "Humanities" 153 | }, 154 | "middle_school_mathematics": { 155 | "name": "初中数学", 156 | "category": "STEM" 157 | }, 158 | "middle_school_physics": { 159 | "name": "初中物理", 160 | "category": "STEM" 161 | }, 162 | "middle_school_politics": { 163 | "name": "初中政治", 164 | "category": "Social Sciences" 165 | }, 166 | "modern_chinese_history": { 167 | "name": "近代史纲要", 168 | "category": "Humanities" 169 | }, 170 | "operating_system": { 171 | "name": "操作系统", 172 | "category": "STEM" 173 | }, 174 | "physician": { 175 | "name": "医师资格", 176 | "category": "Other" 177 | }, 178 | "plant_protection": { 179 | "name": "植物保护", 180 | "category": "Other" 181 | }, 182 | "probability_and_statistics": { 183 | "name": "概率统计", 184 | "category": "STEM" 185 | }, 186 | "professional_tour_guide": { 187 | "name": "导游资格", 188 | "category": "Humanities" 189 | }, 190 | "sports_science": { 191 | "name": "体育学", 192 | "category": "Other" 193 | }, 194 | "tax_accountant": { 195 | "name": "税务师", 196 | "category": "Other" 197 | }, 198 | "teacher_qualification": { 199 | "name": "教师资格", 200 | "category": "Social Sciences" 201 | }, 202 | "urban_and_rural_planner": { 203 | "name": "注册城乡规划师", 204 | "category": "Other" 205 | }, 206 | "veterinary_medicine": { 207 | "name": "兽医学", 208 | "category": "STEM" 209 | } 210 | } -------------------------------------------------------------------------------- /src/llmtuner/train/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import TYPE_CHECKING, Optional, Union 3 | 4 | from llmtuner.extras.logging import get_logger 5 | from llmtuner.hparams import ModelArguments, FinetuningArguments 6 | from llmtuner.model import get_modelcard_args, load_model_and_tokenizer, load_valuehead_params 7 | 8 | if TYPE_CHECKING: 9 | from transformers import Seq2SeqTrainingArguments, Trainer 10 | from transformers.modeling_utils import PreTrainedModel 11 | from trl import AutoModelForCausalLMWithValueHead 12 | from llmtuner.hparams import DataArguments 13 | 14 | 15 | logger = get_logger(__name__) 16 | 17 | 18 | def create_modelcard_and_push( 19 | trainer: "Trainer", 20 | model_args: "ModelArguments", 21 | data_args: "DataArguments", 22 | training_args: "Seq2SeqTrainingArguments", 23 | finetuning_args: "FinetuningArguments" 24 | ) -> None: 25 | if training_args.do_train: 26 | if training_args.push_to_hub: 27 | trainer.push_to_hub(**get_modelcard_args(model_args, data_args, finetuning_args)) 28 | return 29 | try: 30 | trainer.create_model_card(**get_modelcard_args(model_args, data_args, finetuning_args)) 31 | except Exception as err: 32 | logger.warning("Failed to create model card: {}".format(str(err))) 33 | 34 | 35 | def create_ref_model( 36 | model_args: "ModelArguments", 37 | finetuning_args: "FinetuningArguments", 38 | add_valuehead: Optional[bool] = False 39 | ) -> Union["PreTrainedModel", "AutoModelForCausalLMWithValueHead"]: 40 | r""" 41 | Creates reference model for PPO/DPO training. Evaluation mode is not supported. 42 | 43 | The valuehead parameter is randomly initialized since it is useless for PPO training. 44 | """ 45 | if finetuning_args.ref_model is not None: 46 | ref_model_args_dict = model_args.to_dict() 47 | ref_model_args_dict.update(dict( 48 | model_name_or_path=finetuning_args.ref_model, 49 | adapter_name_or_path=finetuning_args.ref_model_adapters, 50 | quantization_bit=finetuning_args.ref_model_quantization_bit 51 | )) 52 | ref_model_args = ModelArguments(**ref_model_args_dict) 53 | ref_finetuning_args = FinetuningArguments(finetuning_type="lora") 54 | ref_model, _ = load_model_and_tokenizer( 55 | ref_model_args, ref_finetuning_args, is_trainable=False, add_valuehead=add_valuehead 56 | ) 57 | logger.info("Created reference model from {}".format(finetuning_args.ref_model)) 58 | else: 59 | if finetuning_args.finetuning_type == "lora": 60 | ref_model = None 61 | else: 62 | ref_model, _ = load_model_and_tokenizer( 63 | model_args, finetuning_args, is_trainable=False, add_valuehead=add_valuehead 64 | ) 65 | logger.info("Created reference model from the model itself.") 66 | 67 | return ref_model 68 | 69 | 70 | def create_reward_model( 71 | model: "AutoModelForCausalLMWithValueHead", 72 | model_args: "ModelArguments", 73 | finetuning_args: "FinetuningArguments" 74 | ) -> "AutoModelForCausalLMWithValueHead": 75 | r""" 76 | Creates reward model for PPO training. 77 | """ 78 | if finetuning_args.reward_model_type == "api": 79 | assert finetuning_args.reward_model.startswith("http"), "Please provide full url." 80 | logger.info("Use reward server {}".format(finetuning_args.reward_model)) 81 | return finetuning_args.reward_model 82 | elif finetuning_args.reward_model_type == "lora": 83 | model.pretrained_model.load_adapter(finetuning_args.reward_model, "reward") 84 | for name, param in model.named_parameters(): # https://github.com/huggingface/peft/issues/1090 85 | if "default" in name: 86 | param.data = param.data.to(torch.float32) # trainable params should in fp32 87 | vhead_params = load_valuehead_params(finetuning_args.reward_model, model_args) 88 | assert vhead_params is not None, "Reward model is not correctly loaded." 89 | model.register_buffer("reward_head_weight", vhead_params["v_head.summary.weight"], persistent=False) 90 | model.register_buffer("reward_head_bias", vhead_params["v_head.summary.bias"], persistent=False) 91 | model.register_buffer("default_head_weight", torch.zeros_like(vhead_params["v_head.summary.weight"]), persistent=False) 92 | model.register_buffer("default_head_bias", torch.zeros_like(vhead_params["v_head.summary.bias"]), persistent=False) 93 | logger.info("Loaded adapter weights of reward model from {}".format(finetuning_args.reward_model)) 94 | return None 95 | else: 96 | reward_model_args_dict = model_args.to_dict() 97 | reward_model_args_dict.update(dict( 98 | model_name_or_path=finetuning_args.reward_model, 99 | adapter_name_or_path=finetuning_args.reward_model_adapters, 100 | quantization_bit=finetuning_args.reward_model_quantization_bit 101 | )) 102 | reward_model_args = ModelArguments(**reward_model_args_dict) 103 | reward_finetuning_args = FinetuningArguments(finetuning_type="lora") 104 | reward_model, _ = load_model_and_tokenizer( 105 | reward_model_args, reward_finetuning_args, is_trainable=False, add_valuehead=True 106 | ) 107 | logger.info("Loaded full weights of reward model from {}".format(finetuning_args.reward_model)) 108 | logger.warning("Please ensure the ppo model and reward model share SAME tokenizer and vocabulary.") 109 | return reward_model 110 | -------------------------------------------------------------------------------- /evaluation/mmlu/mmlu.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import datasets 17 | import pandas as pd 18 | 19 | 20 | _CITATION = """\ 21 | @article{hendryckstest2021, 22 | title={Measuring Massive Multitask Language Understanding}, 23 | author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt}, 24 | journal={Proceedings of the International Conference on Learning Representations (ICLR)}, 25 | year={2021} 26 | } 27 | """ 28 | 29 | _DESCRIPTION = """\ 30 | Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021). 31 | """ 32 | 33 | _HOMEPAGE = "https://github.com/hendrycks/test" 34 | 35 | _LICENSE = "MIT" 36 | 37 | _URL = "mmlu.zip" 38 | 39 | task_list = [ 40 | "high_school_european_history", 41 | "business_ethics", 42 | "clinical_knowledge", 43 | "medical_genetics", 44 | "high_school_us_history", 45 | "high_school_physics", 46 | "high_school_world_history", 47 | "virology", 48 | "high_school_microeconomics", 49 | "econometrics", 50 | "college_computer_science", 51 | "high_school_biology", 52 | "abstract_algebra", 53 | "professional_accounting", 54 | "philosophy", 55 | "professional_medicine", 56 | "nutrition", 57 | "global_facts", 58 | "machine_learning", 59 | "security_studies", 60 | "public_relations", 61 | "professional_psychology", 62 | "prehistory", 63 | "anatomy", 64 | "human_sexuality", 65 | "college_medicine", 66 | "high_school_government_and_politics", 67 | "college_chemistry", 68 | "logical_fallacies", 69 | "high_school_geography", 70 | "elementary_mathematics", 71 | "human_aging", 72 | "college_mathematics", 73 | "high_school_psychology", 74 | "formal_logic", 75 | "high_school_statistics", 76 | "international_law", 77 | "high_school_mathematics", 78 | "high_school_computer_science", 79 | "conceptual_physics", 80 | "miscellaneous", 81 | "high_school_chemistry", 82 | "marketing", 83 | "professional_law", 84 | "management", 85 | "college_physics", 86 | "jurisprudence", 87 | "world_religions", 88 | "sociology", 89 | "us_foreign_policy", 90 | "high_school_macroeconomics", 91 | "computer_security", 92 | "moral_scenarios", 93 | "moral_disputes", 94 | "electrical_engineering", 95 | "astronomy", 96 | "college_biology", 97 | ] 98 | 99 | 100 | class MMLUConfig(datasets.BuilderConfig): 101 | def __init__(self, **kwargs): 102 | super().__init__(version=datasets.Version("1.0.0"), **kwargs) 103 | 104 | 105 | class MMLU(datasets.GeneratorBasedBuilder): 106 | BUILDER_CONFIGS = [ 107 | MMLUConfig( 108 | name=task_name, 109 | ) 110 | for task_name in task_list 111 | ] 112 | 113 | def _info(self): 114 | features = datasets.Features( 115 | { 116 | "question": datasets.Value("string"), 117 | "A": datasets.Value("string"), 118 | "B": datasets.Value("string"), 119 | "C": datasets.Value("string"), 120 | "D": datasets.Value("string"), 121 | "answer": datasets.Value("string"), 122 | } 123 | ) 124 | return datasets.DatasetInfo( 125 | description=_DESCRIPTION, 126 | features=features, 127 | homepage=_HOMEPAGE, 128 | license=_LICENSE, 129 | citation=_CITATION, 130 | ) 131 | 132 | def _split_generators(self, dl_manager): 133 | data_dir = dl_manager.download_and_extract(_URL) 134 | task_name = self.config.name 135 | return [ 136 | datasets.SplitGenerator( 137 | name=datasets.Split.TEST, 138 | gen_kwargs={ 139 | "filepath": os.path.join( 140 | data_dir, "data", "test", f"{task_name}_test.csv" 141 | ), 142 | }, 143 | ), 144 | datasets.SplitGenerator( 145 | name=datasets.Split.VALIDATION, 146 | gen_kwargs={ 147 | "filepath": os.path.join( 148 | data_dir, "data", "val", f"{task_name}_val.csv" 149 | ), 150 | }, 151 | ), 152 | datasets.SplitGenerator( 153 | name=datasets.Split.TRAIN, 154 | gen_kwargs={ 155 | "filepath": os.path.join( 156 | data_dir, "data", "dev", f"{task_name}_dev.csv" 157 | ), 158 | }, 159 | ), 160 | ] 161 | 162 | def _generate_examples(self, filepath): 163 | df = pd.read_csv(filepath) 164 | df.columns = ["question", "A", "B", "C", "D", "answer"] 165 | 166 | for i, instance in enumerate(df.to_dict(orient="records")): 167 | yield i, instance 168 | -------------------------------------------------------------------------------- /src/llmtuner/model/loader.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Optional, Tuple 2 | from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer 3 | from transformers.integrations import is_deepspeed_zero3_enabled 4 | from transformers.utils.versions import require_version 5 | from trl import AutoModelForCausalLMWithValueHead 6 | 7 | from llmtuner.extras.logging import get_logger 8 | from llmtuner.extras.misc import count_parameters, get_current_device, try_download_model_from_ms 9 | from llmtuner.model.adapter import init_adapter 10 | from llmtuner.model.patcher import patch_config, patch_tokenizer, patch_model, patch_valuehead_model 11 | from llmtuner.model.utils import load_valuehead_params, register_autoclass 12 | 13 | if TYPE_CHECKING: 14 | from transformers import PreTrainedModel, PreTrainedTokenizer 15 | from llmtuner.hparams import ModelArguments, FinetuningArguments 16 | 17 | 18 | logger = get_logger(__name__) 19 | 20 | 21 | require_version("transformers>=4.36.2", "To fix: pip install transformers>=4.36.2") 22 | require_version("datasets>=2.14.3", "To fix: pip install datasets>=2.14.3") 23 | require_version("accelerate>=0.21.0", "To fix: pip install accelerate>=0.21.0") 24 | require_version("peft>=0.7.0", "To fix: pip install peft>=0.7.0") 25 | require_version("trl>=0.7.6", "To fix: pip install trl>=0.7.6") 26 | 27 | 28 | def load_model_and_tokenizer( 29 | model_args: "ModelArguments", 30 | finetuning_args: "FinetuningArguments", 31 | is_trainable: Optional[bool] = False, 32 | add_valuehead: Optional[bool] = False 33 | ) -> Tuple["PreTrainedModel", "PreTrainedTokenizer"]: 34 | r""" 35 | Loads pretrained model and tokenizer. 36 | 37 | Support both training and inference. 38 | """ 39 | 40 | try_download_model_from_ms(model_args) 41 | 42 | config_kwargs = { 43 | "trust_remote_code": True, 44 | "cache_dir": model_args.cache_dir, 45 | "revision": model_args.model_revision, 46 | "token": model_args.hf_hub_token 47 | } 48 | 49 | tokenizer = AutoTokenizer.from_pretrained( 50 | model_args.model_name_or_path, 51 | use_fast=model_args.use_fast_tokenizer, 52 | split_special_tokens=model_args.split_special_tokens, 53 | padding_side="right", 54 | **config_kwargs 55 | ) 56 | patch_tokenizer(tokenizer) 57 | 58 | config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) 59 | patch_config(config, tokenizer, model_args, config_kwargs, is_trainable) 60 | 61 | model = None 62 | if is_trainable and model_args.use_unsloth: 63 | require_version("unsloth", "Follow the instructions at: https://github.com/unslothai/unsloth") 64 | from unsloth import FastLlamaModel, FastMistralModel # type: ignore 65 | unsloth_kwargs = { 66 | "model_name": model_args.model_name_or_path, 67 | "max_seq_length": model_args.model_max_length, 68 | "dtype": model_args.compute_dtype, 69 | "load_in_4bit": model_args.quantization_bit == 4, 70 | "token": model_args.hf_hub_token, 71 | "device_map": get_current_device(), 72 | "rope_scaling": getattr(config, "rope_scaling", None) 73 | } 74 | if getattr(config, "model_type", None) == "llama": 75 | model, _ = FastLlamaModel.from_pretrained(**unsloth_kwargs) 76 | elif getattr(config, "model_type", None) == "mistral": 77 | model, _ = FastMistralModel.from_pretrained(**unsloth_kwargs) 78 | else: 79 | logger.warning("Unsloth does not support model type {}.".format(getattr(config, "model_type", None))) 80 | model_args.use_unsloth = False 81 | 82 | if model_args.adapter_name_or_path: 83 | model_args.adapter_name_or_path = None 84 | logger.warning("Unsloth does not support loading adapters.") 85 | 86 | if model is None: 87 | model = AutoModelForCausalLM.from_pretrained( 88 | model_args.model_name_or_path, 89 | config=config, 90 | torch_dtype=model_args.compute_dtype, 91 | low_cpu_mem_usage=(not is_deepspeed_zero3_enabled()), 92 | **config_kwargs 93 | ) 94 | 95 | patch_model(model, tokenizer, model_args, is_trainable) 96 | register_autoclass(config, model, tokenizer) 97 | 98 | model = init_adapter(model, model_args, finetuning_args, is_trainable) 99 | 100 | if add_valuehead: 101 | model: "AutoModelForCausalLMWithValueHead" = AutoModelForCausalLMWithValueHead.from_pretrained(model) 102 | patch_valuehead_model(model) 103 | 104 | if model_args.adapter_name_or_path is not None: 105 | vhead_path = model_args.adapter_name_or_path[-1] 106 | else: 107 | vhead_path = model_args.model_name_or_path 108 | 109 | vhead_params = load_valuehead_params(vhead_path, model_args) 110 | if vhead_params is not None: 111 | model.load_state_dict(vhead_params, strict=False) 112 | logger.info("Loaded valuehead from checkpoint: {}".format(vhead_path)) 113 | 114 | if not is_trainable: 115 | model.requires_grad_(False) 116 | model = model.to(model_args.compute_dtype) if not getattr(model, "quantization_method", None) else model 117 | model.eval() 118 | else: 119 | model.train() 120 | 121 | trainable_params, all_param = count_parameters(model) 122 | logger.info("trainable params: {:d} || all params: {:d} || trainable%: {:.4f}".format( 123 | trainable_params, all_param, 100 * trainable_params / all_param 124 | )) 125 | 126 | if not is_trainable: 127 | logger.info("This IS expected that the trainable params is 0 if you are using model for inference only.") 128 | 129 | return model, tokenizer 130 | -------------------------------------------------------------------------------- /evaluation/cmmlu/cmmlu.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import datasets 17 | import pandas as pd 18 | 19 | 20 | _CITATION = """\ 21 | @article{li2023cmmlu, 22 | title={CMMLU: Measuring massive multitask language understanding in Chinese}, 23 | author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin}, 24 | journal={arXiv preprint arXiv:2306.09212}, 25 | year={2023} 26 | } 27 | """ 28 | 29 | _DESCRIPTION = """\ 30 | CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context. 31 | """ 32 | 33 | _HOMEPAGE = "https://github.com/haonan-li/CMMLU" 34 | 35 | _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License" 36 | 37 | _URL = "cmmlu.zip" 38 | 39 | task_list = [ 40 | 'agronomy', 41 | 'anatomy', 42 | 'ancient_chinese', 43 | 'arts', 44 | 'astronomy', 45 | 'business_ethics', 46 | 'chinese_civil_service_exam', 47 | 'chinese_driving_rule', 48 | 'chinese_food_culture', 49 | 'chinese_foreign_policy', 50 | 'chinese_history', 51 | 'chinese_literature', 52 | 'chinese_teacher_qualification', 53 | 'clinical_knowledge', 54 | 'college_actuarial_science', 55 | 'college_education', 56 | 'college_engineering_hydrology', 57 | 'college_law', 58 | 'college_mathematics', 59 | 'college_medical_statistics', 60 | 'college_medicine', 61 | 'computer_science', 62 | 'computer_security', 63 | 'conceptual_physics', 64 | 'construction_project_management', 65 | 'economics', 66 | 'education', 67 | 'electrical_engineering', 68 | 'elementary_chinese', 69 | 'elementary_commonsense', 70 | 'elementary_information_and_technology', 71 | 'elementary_mathematics', 72 | 'ethnology', 73 | 'food_science', 74 | 'genetics', 75 | 'global_facts', 76 | 'high_school_biology', 77 | 'high_school_chemistry', 78 | 'high_school_geography', 79 | 'high_school_mathematics', 80 | 'high_school_physics', 81 | 'high_school_politics', 82 | 'human_sexuality', 83 | 'international_law', 84 | 'journalism', 85 | 'jurisprudence', 86 | 'legal_and_moral_basis', 87 | 'logical', 88 | 'machine_learning', 89 | 'management', 90 | 'marketing', 91 | 'marxist_theory', 92 | 'modern_chinese', 93 | 'nutrition', 94 | 'philosophy', 95 | 'professional_accounting', 96 | 'professional_law', 97 | 'professional_medicine', 98 | 'professional_psychology', 99 | 'public_relations', 100 | 'security_study', 101 | 'sociology', 102 | 'sports_science', 103 | 'traditional_chinese_medicine', 104 | 'virology', 105 | 'world_history', 106 | 'world_religions', 107 | ] 108 | 109 | 110 | class CMMLUConfig(datasets.BuilderConfig): 111 | def __init__(self, **kwargs): 112 | super().__init__(version=datasets.Version("1.0.1"), **kwargs) 113 | 114 | 115 | class CMMLU(datasets.GeneratorBasedBuilder): 116 | BUILDER_CONFIGS = [ 117 | CMMLUConfig( 118 | name=task_name, 119 | ) 120 | for task_name in task_list 121 | ] 122 | 123 | def _info(self): 124 | features = datasets.Features( 125 | { 126 | "question": datasets.Value("string"), 127 | "A": datasets.Value("string"), 128 | "B": datasets.Value("string"), 129 | "C": datasets.Value("string"), 130 | "D": datasets.Value("string"), 131 | "answer": datasets.Value("string"), 132 | } 133 | ) 134 | return datasets.DatasetInfo( 135 | description=_DESCRIPTION, 136 | features=features, 137 | homepage=_HOMEPAGE, 138 | license=_LICENSE, 139 | citation=_CITATION, 140 | ) 141 | 142 | def _split_generators(self, dl_manager): 143 | data_dir = dl_manager.download_and_extract(_URL) 144 | task_name = self.config.name 145 | return [ 146 | datasets.SplitGenerator( 147 | name=datasets.Split.TEST, 148 | gen_kwargs={ 149 | "filepath": os.path.join(data_dir, f"test/{task_name}.csv"), 150 | }, 151 | ), 152 | datasets.SplitGenerator( 153 | name=datasets.Split.TRAIN, 154 | gen_kwargs={ 155 | "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"), 156 | }, 157 | ), 158 | ] 159 | 160 | def _generate_examples(self, filepath): 161 | df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8") 162 | for i, instance in enumerate(df.to_dict(orient="records")): 163 | question = instance.pop("Question", "") 164 | answer = instance.pop("Answer", "") 165 | instance["question"] = question 166 | instance["answer"] = answer 167 | yield i, instance 168 | -------------------------------------------------------------------------------- /src/llmtuner/hparams/model_args.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Literal, Optional 2 | from dataclasses import asdict, dataclass, field 3 | 4 | 5 | @dataclass 6 | class ModelArguments: 7 | r""" 8 | Arguments pertaining to which model/config/tokenizer we are going to fine-tune. 9 | """ 10 | model_name_or_path: str = field( 11 | metadata={"help": "Path to the model weight or identifier from huggingface.co/models or modelscope.cn/models."} 12 | ) 13 | adapter_name_or_path: Optional[str] = field( 14 | default=None, 15 | metadata={"help": "Path to the adapter weight or identifier from huggingface.co/models."} 16 | ) 17 | cache_dir: Optional[str] = field( 18 | default=None, 19 | metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."} 20 | ) 21 | use_fast_tokenizer: Optional[bool] = field( 22 | default=False, 23 | metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."} 24 | ) 25 | resize_vocab: Optional[bool] = field( 26 | default=False, 27 | metadata={"help": "Whether or not to resize the tokenizer vocab and the embedding layers."} 28 | ) 29 | split_special_tokens: Optional[bool] = field( 30 | default=False, 31 | metadata={"help": "Whether or not the special tokens should be split during the tokenization process."} 32 | ) 33 | model_revision: Optional[str] = field( 34 | default="main", 35 | metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."} 36 | ) 37 | quantization_bit: Optional[int] = field( 38 | default=None, 39 | metadata={"help": "The number of bits to quantize the model."} 40 | ) 41 | quantization_type: Optional[Literal["fp4", "nf4"]] = field( 42 | default="nf4", 43 | metadata={"help": "Quantization data type to use in int4 training."} 44 | ) 45 | double_quantization: Optional[bool] = field( 46 | default=True, 47 | metadata={"help": "Whether or not to use double quantization in int4 training."} 48 | ) 49 | rope_scaling: Optional[Literal["linear", "dynamic"]] = field( 50 | default=None, 51 | metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."} 52 | ) 53 | flash_attn: Optional[bool] = field( 54 | default=False, 55 | metadata={"help": "Enable FlashAttention-2 for faster training."} 56 | ) 57 | shift_attn: Optional[bool] = field( 58 | default=False, 59 | metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."} 60 | ) 61 | use_unsloth: Optional[bool] = field( 62 | default=False, 63 | metadata={"help": "Whether or not to use unsloth's optimization for the LoRA training."} 64 | ) 65 | disable_gradient_checkpointing: Optional[bool] = field( 66 | default=False, 67 | metadata={"help": "Whether or not to disable gradient checkpointing."} 68 | ) 69 | upcast_layernorm: Optional[bool] = field( 70 | default=False, 71 | metadata={"help": "Whether or not to upcast the layernorm weights in fp32."} 72 | ) 73 | hf_hub_token: Optional[str] = field( 74 | default=None, 75 | metadata={"help": "Auth token to log in with Hugging Face Hub."} 76 | ) 77 | ms_hub_token: Optional[str] = field( 78 | default=None, 79 | metadata={"help": "Auth token to log in with ModelScope Hub."} 80 | ) 81 | export_dir: Optional[str] = field( 82 | default=None, 83 | metadata={"help": "Path to the directory to save the exported model."} 84 | ) 85 | export_size: Optional[int] = field( 86 | default=1, 87 | metadata={"help": "The file shard size (in GB) of the exported model."} 88 | ) 89 | export_quantization_bit: Optional[int] = field( 90 | default=None, 91 | metadata={"help": "The number of bits to quantize the exported model."} 92 | ) 93 | export_quantization_dataset: Optional[str] = field( 94 | default=None, 95 | metadata={"help": "Path to the dataset or dataset name to use in quantizing the exported model."} 96 | ) 97 | export_quantization_nsamples: Optional[int] = field( 98 | default=128, 99 | metadata={"help": "The number of samples used for quantization."} 100 | ) 101 | export_quantization_maxlen: Optional[int] = field( 102 | default=1024, 103 | metadata={"help": "The maximum length of the model inputs used for quantization."} 104 | ) 105 | export_legacy_format: Optional[bool] = field( 106 | default=False, 107 | metadata={"help": "Whether or not to save the `.bin` files instead of `.safetensors`."} 108 | ) 109 | 110 | def __post_init__(self): 111 | self.compute_dtype = None 112 | self.model_max_length = None 113 | 114 | if self.split_special_tokens and self.use_fast_tokenizer: 115 | raise ValueError("`split_special_tokens` is only supported for slow tokenizers.") 116 | 117 | if self.adapter_name_or_path is not None: # support merging multiple lora weights 118 | self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")] 119 | 120 | assert self.quantization_bit in [None, 8, 4], "We only accept 4-bit or 8-bit quantization." 121 | assert self.export_quantization_bit in [None, 8, 4, 3, 2], "We only accept 2/3/4/8-bit quantization." 122 | 123 | if self.export_quantization_bit is not None and self.export_quantization_dataset is None: 124 | raise ValueError("Quantization dataset is necessary for exporting.") 125 | 126 | def to_dict(self) -> Dict[str, Any]: 127 | return asdict(self) 128 | -------------------------------------------------------------------------------- /src/llmtuner/model/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import inspect 3 | from typing import TYPE_CHECKING, Any, Dict, List 4 | from transformers import PreTrainedModel 5 | from transformers.utils import cached_file 6 | from transformers.trainer import WEIGHTS_NAME, SAFE_WEIGHTS_NAME 7 | 8 | from llmtuner.extras.logging import get_logger 9 | from llmtuner.extras.misc import get_current_device 10 | 11 | if TYPE_CHECKING: 12 | from transformers import PretrainedConfig, PreTrainedTokenizer 13 | from llmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments 14 | 15 | 16 | logger = get_logger(__name__) 17 | 18 | 19 | def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel": 20 | r""" 21 | Dispatches a pre-trained model to GPUs with balanced memory when the GPU is available. 22 | Borrowed from: https://github.com/huggingface/transformers/blob/v4.36.2/src/transformers/modeling_utils.py#L3570 23 | """ 24 | if getattr(model, "quantization_method", None): # already set on current device 25 | return model 26 | 27 | if ( 28 | torch.cuda.device_count() > 1 29 | and isinstance(model, PreTrainedModel) 30 | and getattr(model.config, "model_type", None) != "chatglm" 31 | ): 32 | from accelerate import dispatch_model 33 | from accelerate.utils import infer_auto_device_map, get_balanced_memory 34 | 35 | if getattr(model, "_no_split_modules", None) is None: 36 | raise ValueError("The model class needs to implement the `_no_split_modules` attribute.") 37 | 38 | kwargs = {"dtype": model.dtype, "no_split_module_classes": model._get_no_split_modules("auto")} 39 | max_memory = get_balanced_memory(model, **kwargs) 40 | # Make sure tied weights are tied before creating the device map. 41 | model.tie_weights() 42 | device_map = infer_auto_device_map(model, max_memory=max_memory, **kwargs) 43 | device_map_kwargs = {"device_map": device_map} 44 | if "skip_keys" in inspect.signature(dispatch_model).parameters: 45 | device_map_kwargs["skip_keys"] = model._skip_keys_device_placement 46 | return dispatch_model(model, **device_map_kwargs) 47 | else: 48 | return model.to(device=get_current_device()) 49 | 50 | 51 | def find_all_linear_modules(model: "PreTrainedModel") -> List[str]: 52 | r""" 53 | Finds all available modules to apply lora. 54 | """ 55 | quantization_method = getattr(model, "quantization_method", None) 56 | if quantization_method is None: 57 | linear_cls = torch.nn.Linear 58 | elif quantization_method == "bitsandbytes": 59 | import bitsandbytes as bnb 60 | linear_cls = bnb.nn.Linear4bit if getattr(model, "is_loaded_in_4bit", False) else bnb.nn.Linear8bitLt 61 | else: 62 | raise ValueError("Finding linear modules for {} models is not supported.".format(quantization_method)) 63 | 64 | output_layer_names = ["lm_head"] 65 | if model.config.model_type == "chatglm": 66 | output_layer_names.append("output_layer") 67 | 68 | module_names = set() 69 | for name, module in model.named_modules(): 70 | if ( 71 | isinstance(module, linear_cls) 72 | and not any([output_layer in name for output_layer in output_layer_names]) 73 | ): 74 | module_names.add(name.split(".")[-1]) 75 | 76 | logger.info("Found linear modules: {}".format(",".join(module_names))) 77 | return list(module_names) 78 | 79 | 80 | def get_modelcard_args( 81 | model_args: "ModelArguments", 82 | data_args: "DataArguments", 83 | finetuning_args: "FinetuningArguments" 84 | ) -> Dict[str, Any]: 85 | return { 86 | "tasks": "text-generation", 87 | "license": "other", 88 | "finetuned_from": model_args.model_name_or_path, 89 | "dataset": [dataset.strip() for dataset in data_args.dataset.split(",")], 90 | "tags": ["llama-factory"] + (["lora"] if finetuning_args.finetuning_type == "lora" else []) 91 | } 92 | 93 | 94 | def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Dict[str, torch.Tensor]: 95 | r""" 96 | Loads value head parameters from Hugging Face Hub or local disk. 97 | 98 | Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`. 99 | """ 100 | kwargs = { 101 | "path_or_repo_id": path_or_repo_id, 102 | "cache_dir": model_args.cache_dir, 103 | "token": model_args.hf_hub_token 104 | } 105 | 106 | try: 107 | from safetensors import safe_open 108 | vhead_file = cached_file(filename=SAFE_WEIGHTS_NAME, **kwargs) 109 | with safe_open(vhead_file, framework="pt", device="cpu") as f: 110 | return { 111 | "v_head.summary.weight": f.get_tensor("v_head.summary.weight"), 112 | "v_head.summary.bias": f.get_tensor("v_head.summary.bias") 113 | } 114 | except Exception as err: 115 | logger.info("Failed to load {}: {}".format(SAFE_WEIGHTS_NAME, str(err))) 116 | 117 | try: 118 | vhead_file = cached_file(filename=WEIGHTS_NAME, **kwargs) 119 | return torch.load(vhead_file, map_location="cpu") 120 | except Exception as err: 121 | logger.info("Failed to load {}: {}".format(WEIGHTS_NAME, str(err))) 122 | 123 | logger.warning("Provided path ({}) does not contain valuehead weights.".format(path_or_repo_id)) 124 | return None 125 | 126 | 127 | def register_autoclass(config: "PretrainedConfig", model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer"): 128 | if "AutoConfig" in getattr(config, "auto_map", {}): 129 | config.__class__.register_for_auto_class() 130 | if "AutoModelForCausalLM" in getattr(config, "auto_map", {}): 131 | model.__class__.register_for_auto_class() 132 | if "AutoTokenizer" in tokenizer.init_kwargs.get("auto_map", {}): 133 | tokenizer.__class__.register_for_auto_class() 134 | -------------------------------------------------------------------------------- /evaluation/ceval/ceval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | import os 15 | 16 | import datasets 17 | import pandas as pd 18 | 19 | 20 | _CITATION = """\ 21 | @article{huang2023ceval, 22 | title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, 23 | author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian}, 24 | journal={arXiv preprint arXiv:2305.08322}, 25 | year={2023} 26 | } 27 | """ 28 | 29 | _DESCRIPTION = """\ 30 | C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels. 31 | """ 32 | 33 | _HOMEPAGE = "https://cevalbenchmark.com" 34 | 35 | _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License" 36 | 37 | _URL = "ceval.zip" 38 | 39 | task_list = [ 40 | "computer_network", 41 | "operating_system", 42 | "computer_architecture", 43 | "college_programming", 44 | "college_physics", 45 | "college_chemistry", 46 | "advanced_mathematics", 47 | "probability_and_statistics", 48 | "discrete_mathematics", 49 | "electrical_engineer", 50 | "metrology_engineer", 51 | "high_school_mathematics", 52 | "high_school_physics", 53 | "high_school_chemistry", 54 | "high_school_biology", 55 | "middle_school_mathematics", 56 | "middle_school_biology", 57 | "middle_school_physics", 58 | "middle_school_chemistry", 59 | "veterinary_medicine", 60 | "college_economics", 61 | "business_administration", 62 | "marxism", 63 | "mao_zedong_thought", 64 | "education_science", 65 | "teacher_qualification", 66 | "high_school_politics", 67 | "high_school_geography", 68 | "middle_school_politics", 69 | "middle_school_geography", 70 | "modern_chinese_history", 71 | "ideological_and_moral_cultivation", 72 | "logic", 73 | "law", 74 | "chinese_language_and_literature", 75 | "art_studies", 76 | "professional_tour_guide", 77 | "legal_professional", 78 | "high_school_chinese", 79 | "high_school_history", 80 | "middle_school_history", 81 | "civil_servant", 82 | "sports_science", 83 | "plant_protection", 84 | "basic_medicine", 85 | "clinical_medicine", 86 | "urban_and_rural_planner", 87 | "accountant", 88 | "fire_engineer", 89 | "environmental_impact_assessment_engineer", 90 | "tax_accountant", 91 | "physician", 92 | ] 93 | 94 | 95 | class CevalConfig(datasets.BuilderConfig): 96 | def __init__(self, **kwargs): 97 | super().__init__(version=datasets.Version("1.0.0"), **kwargs) 98 | 99 | 100 | class Ceval(datasets.GeneratorBasedBuilder): 101 | BUILDER_CONFIGS = [ 102 | CevalConfig( 103 | name=task_name, 104 | ) 105 | for task_name in task_list 106 | ] 107 | 108 | def _info(self): 109 | features = datasets.Features( 110 | { 111 | "id": datasets.Value("int32"), 112 | "question": datasets.Value("string"), 113 | "A": datasets.Value("string"), 114 | "B": datasets.Value("string"), 115 | "C": datasets.Value("string"), 116 | "D": datasets.Value("string"), 117 | "answer": datasets.Value("string"), 118 | "explanation": datasets.Value("string"), 119 | } 120 | ) 121 | return datasets.DatasetInfo( 122 | description=_DESCRIPTION, 123 | features=features, 124 | homepage=_HOMEPAGE, 125 | license=_LICENSE, 126 | citation=_CITATION, 127 | ) 128 | 129 | def _split_generators(self, dl_manager): 130 | data_dir = dl_manager.download_and_extract(_URL) 131 | task_name = self.config.name 132 | return [ 133 | datasets.SplitGenerator( 134 | name=datasets.Split.TEST, 135 | gen_kwargs={ 136 | "filepath": os.path.join( 137 | data_dir, "test", f"{task_name}_test.csv" 138 | ), 139 | }, 140 | ), 141 | datasets.SplitGenerator( 142 | name=datasets.Split.VALIDATION, 143 | gen_kwargs={ 144 | "filepath": os.path.join( 145 | data_dir, "val", f"{task_name}_val.csv" 146 | ), 147 | }, 148 | ), 149 | datasets.SplitGenerator( 150 | name=datasets.Split.TRAIN, 151 | gen_kwargs={ 152 | "filepath": os.path.join( 153 | data_dir, "dev", f"{task_name}_dev.csv" 154 | ), 155 | }, 156 | ), 157 | ] 158 | 159 | def _generate_examples(self, filepath): 160 | df = pd.read_csv(filepath, encoding="utf-8") 161 | for i, instance in enumerate(df.to_dict(orient="records")): 162 | if "answer" not in instance.keys(): 163 | instance["answer"] = "" 164 | if "explanation" not in instance.keys(): 165 | instance["explanation"] = "" 166 | yield i, instance 167 | -------------------------------------------------------------------------------- /evaluation/mmlu/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "abstract_algebra": { 3 | "name": "abstract algebra", 4 | "category": "STEM" 5 | }, 6 | "anatomy": { 7 | "name": "anatomy", 8 | "category": "Other" 9 | }, 10 | "astronomy": { 11 | "name": "astronomy", 12 | "category": "STEM" 13 | }, 14 | "business_ethics": { 15 | "name": "business ethics", 16 | "category": "Other" 17 | }, 18 | "clinical_knowledge": { 19 | "name": "clinical knowledge", 20 | "category": "Other" 21 | }, 22 | "college_biology": { 23 | "name": "college biology", 24 | "category": "STEM" 25 | }, 26 | "college_chemistry": { 27 | "name": "college chemistry", 28 | "category": "STEM" 29 | }, 30 | "college_computer_science": { 31 | "name": "college computer science", 32 | "category": "STEM" 33 | }, 34 | "college_mathematics": { 35 | "name": "college mathematics", 36 | "category": "STEM" 37 | }, 38 | "college_medicine": { 39 | "name": "college medicine", 40 | "category": "Other" 41 | }, 42 | "college_physics": { 43 | "name": "college physics", 44 | "category": "STEM" 45 | }, 46 | "computer_security": { 47 | "name": "computer security", 48 | "category": "STEM" 49 | }, 50 | "conceptual_physics": { 51 | "name": "conceptual physics", 52 | "category": "STEM" 53 | }, 54 | "econometrics": { 55 | "name": "econometrics", 56 | "category": "Social Sciences" 57 | }, 58 | "electrical_engineering": { 59 | "name": "electrical engineering", 60 | "category": "STEM" 61 | }, 62 | "elementary_mathematics": { 63 | "name": "elementary mathematics", 64 | "category": "STEM" 65 | }, 66 | "formal_logic": { 67 | "name": "formal logic", 68 | "category": "Humanities" 69 | }, 70 | "global_facts": { 71 | "name": "global facts", 72 | "category": "Other" 73 | }, 74 | "high_school_biology": { 75 | "name": "high school biology", 76 | "category": "STEM" 77 | }, 78 | "high_school_chemistry": { 79 | "name": "high school chemistry", 80 | "category": "STEM" 81 | }, 82 | "high_school_computer_science": { 83 | "name": "high school computer science", 84 | "category": "STEM" 85 | }, 86 | "high_school_european_history": { 87 | "name": "high school european history", 88 | "category": "Humanities" 89 | }, 90 | "high_school_geography": { 91 | "name": "high school geography", 92 | "category": "Social Sciences" 93 | }, 94 | "high_school_government_and_politics": { 95 | "name": "high school government and politics", 96 | "category": "Social Sciences" 97 | }, 98 | "high_school_macroeconomics": { 99 | "name": "high school macroeconomics", 100 | "category": "Social Sciences" 101 | }, 102 | "high_school_mathematics": { 103 | "name": "high school mathematics", 104 | "category": "STEM" 105 | }, 106 | "high_school_microeconomics": { 107 | "name": "high school microeconomics", 108 | "category": "Social Sciences" 109 | }, 110 | "high_school_physics": { 111 | "name": "high school physics", 112 | "category": "STEM" 113 | }, 114 | "high_school_psychology": { 115 | "name": "high school psychology", 116 | "category": "Social Sciences" 117 | }, 118 | "high_school_statistics": { 119 | "name": "high school statistics", 120 | "category": "STEM" 121 | }, 122 | "high_school_us_history": { 123 | "name": "high school us history", 124 | "category": "Humanities" 125 | }, 126 | "high_school_world_history": { 127 | "name": "high school world history", 128 | "category": "Humanities" 129 | }, 130 | "human_aging": { 131 | "name": "human aging", 132 | "category": "Other" 133 | }, 134 | "human_sexuality": { 135 | "name": "human sexuality", 136 | "category": "Social Sciences" 137 | }, 138 | "international_law": { 139 | "name": "international law", 140 | "category": "Humanities" 141 | }, 142 | "jurisprudence": { 143 | "name": "jurisprudence", 144 | "category": "Humanities" 145 | }, 146 | "logical_fallacies": { 147 | "name": "logical fallacies", 148 | "category": "Humanities" 149 | }, 150 | "machine_learning": { 151 | "name": "machine learning", 152 | "category": "STEM" 153 | }, 154 | "management": { 155 | "name": "management", 156 | "category": "Other" 157 | }, 158 | "marketing": { 159 | "name": "marketing", 160 | "category": "Other" 161 | }, 162 | "medical_genetics": { 163 | "name": "medical genetics", 164 | "category": "Other" 165 | }, 166 | "miscellaneous": { 167 | "name": "miscellaneous", 168 | "category": "Other" 169 | }, 170 | "moral_disputes": { 171 | "name": "moral disputes", 172 | "category": "Humanities" 173 | }, 174 | "moral_scenarios": { 175 | "name": "moral scenarios", 176 | "category": "Humanities" 177 | }, 178 | "nutrition": { 179 | "name": "nutrition", 180 | "category": "Other" 181 | }, 182 | "philosophy": { 183 | "name": "philosophy", 184 | "category": "Humanities" 185 | }, 186 | "prehistory": { 187 | "name": "prehistory", 188 | "category": "Humanities" 189 | }, 190 | "professional_accounting": { 191 | "name": "professional accounting", 192 | "category": "Other" 193 | }, 194 | "professional_law": { 195 | "name": "professional law", 196 | "category": "Humanities" 197 | }, 198 | "professional_medicine": { 199 | "name": "professional medicine", 200 | "category": "Other" 201 | }, 202 | "professional_psychology": { 203 | "name": "professional psychology", 204 | "category": "Social Sciences" 205 | }, 206 | "public_relations": { 207 | "name": "public relations", 208 | "category": "Social Sciences" 209 | }, 210 | "security_studies": { 211 | "name": "security studies", 212 | "category": "Social Sciences" 213 | }, 214 | "sociology": { 215 | "name": "sociology", 216 | "category": "Social Sciences" 217 | }, 218 | "us_foreign_policy": { 219 | "name": "us foreign policy", 220 | "category": "Social Sciences" 221 | }, 222 | "virology": { 223 | "name": "virology", 224 | "category": "Other" 225 | }, 226 | "world_religions": { 227 | "name": "world religions", 228 | "category": "Humanities" 229 | } 230 | } -------------------------------------------------------------------------------- /src/llmtuner/model/adapter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import TYPE_CHECKING 3 | from transformers.integrations import is_deepspeed_zero3_enabled 4 | from peft import PeftModel, TaskType, LoraConfig, get_peft_model 5 | 6 | from llmtuner.extras.logging import get_logger 7 | from llmtuner.model.utils import find_all_linear_modules 8 | 9 | if TYPE_CHECKING: 10 | from transformers.modeling_utils import PreTrainedModel 11 | from llmtuner.hparams import ModelArguments, FinetuningArguments 12 | 13 | 14 | logger = get_logger(__name__) 15 | 16 | 17 | def init_adapter( 18 | model: "PreTrainedModel", 19 | model_args: "ModelArguments", 20 | finetuning_args: "FinetuningArguments", 21 | is_trainable: bool 22 | ) -> "PreTrainedModel": 23 | r""" 24 | Initializes the adapters. 25 | 26 | Support full-parameter, freeze and LoRA training. 27 | 28 | Note that the trainable parameters must be cast to float32. 29 | """ 30 | 31 | if (not is_trainable) and model_args.adapter_name_or_path is None: 32 | logger.info("Adapter is not found at evaluation, load the base model.") 33 | return model 34 | 35 | if finetuning_args.finetuning_type == "full" and is_trainable: 36 | logger.info("Fine-tuning method: Full") 37 | model = model.float() 38 | 39 | if finetuning_args.finetuning_type == "freeze" and is_trainable: 40 | logger.info("Fine-tuning method: Freeze") 41 | num_layers = ( 42 | getattr(model.config, "num_hidden_layers", None) 43 | or getattr(model.config, "num_layers", None) 44 | or getattr(model.config, "n_layer", None) 45 | ) 46 | if not num_layers: 47 | raise ValueError("Current model does not support freeze tuning.") 48 | 49 | if finetuning_args.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0 50 | trainable_layer_ids = [num_layers - k - 1 for k in range(finetuning_args.num_layer_trainable)] 51 | else: # fine-tuning the first n layers if num_layer_trainable < 0 52 | trainable_layer_ids = [k for k in range(-finetuning_args.num_layer_trainable)] 53 | 54 | trainable_layers = [] 55 | for module_name in finetuning_args.name_module_trainable: 56 | for idx in trainable_layer_ids: 57 | trainable_layers.append("{:d}.{}".format(idx, module_name)) 58 | 59 | for name, param in model.named_parameters(): 60 | if not any(trainable_layer in name for trainable_layer in trainable_layers): 61 | param.requires_grad_(False) 62 | else: 63 | param.data = param.data.to(torch.float32) 64 | 65 | if finetuning_args.finetuning_type == "lora": 66 | logger.info("Fine-tuning method: LoRA") 67 | adapter_to_resume = None 68 | 69 | if model_args.adapter_name_or_path is not None: 70 | is_mergeable = True 71 | if getattr(model, "quantization_method", None): # merge lora in quantized model is unstable 72 | assert len(model_args.adapter_name_or_path) == 1, "Quantized model only accepts a single adapter." 73 | is_mergeable = False 74 | 75 | if is_deepspeed_zero3_enabled(): 76 | assert len(model_args.adapter_name_or_path) == 1, "Cannot use multiple adapters in DeepSpeed ZeRO-3." 77 | is_mergeable = False 78 | 79 | if (is_trainable and not finetuning_args.create_new_adapter) or (not is_mergeable): 80 | adapter_to_merge = model_args.adapter_name_or_path[:-1] 81 | adapter_to_resume = model_args.adapter_name_or_path[-1] 82 | else: 83 | adapter_to_merge = model_args.adapter_name_or_path 84 | 85 | for adapter in adapter_to_merge: 86 | model = PeftModel.from_pretrained(model, adapter) 87 | model = model.merge_and_unload() 88 | 89 | if len(adapter_to_merge) > 0: 90 | logger.info("Merged {} adapter(s).".format(len(adapter_to_merge))) 91 | 92 | if adapter_to_resume is not None: # resume lora training 93 | model = PeftModel.from_pretrained(model, adapter_to_resume, is_trainable=is_trainable) 94 | 95 | if is_trainable and adapter_to_resume is None: # create new lora weights while training 96 | if len(finetuning_args.lora_target) == 1 and finetuning_args.lora_target[0] == "all": 97 | target_modules = find_all_linear_modules(model) 98 | else: 99 | target_modules = finetuning_args.lora_target 100 | 101 | peft_kwargs = { 102 | "r": finetuning_args.lora_rank, 103 | "target_modules": target_modules, 104 | "lora_alpha": finetuning_args.lora_alpha, 105 | "lora_dropout": finetuning_args.lora_dropout 106 | } 107 | 108 | if model_args.use_unsloth: 109 | from unsloth import FastLlamaModel, FastMistralModel # type: ignore 110 | unsloth_peft_kwargs = {"model": model, "max_seq_length": model_args.model_max_length} 111 | if getattr(model.config, "model_type", None) == "llama": 112 | model = FastLlamaModel.get_peft_model(**peft_kwargs, **unsloth_peft_kwargs) 113 | elif getattr(model.config, "model_type", None) == "mistral": 114 | model = FastMistralModel.get_peft_model(**peft_kwargs, **unsloth_peft_kwargs) 115 | else: 116 | raise NotImplementedError 117 | 118 | else: 119 | lora_config = LoraConfig( 120 | task_type=TaskType.CAUSAL_LM, 121 | inference_mode=False, 122 | modules_to_save=finetuning_args.additional_target, 123 | **peft_kwargs 124 | ) 125 | model = get_peft_model(model, lora_config) 126 | 127 | for param in filter(lambda p: p.requires_grad, model.parameters()): 128 | param.data = param.data.to(torch.float32) 129 | 130 | if model_args.adapter_name_or_path is not None: 131 | logger.info("Loaded adapter(s): {}".format(",".join(model_args.adapter_name_or_path))) 132 | 133 | return model 134 | -------------------------------------------------------------------------------- /evaluation/cmmlu/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "agronomy": { 3 | "name": "农学", 4 | "category": "Other" 5 | }, 6 | "anatomy": { 7 | "name": "解剖学", 8 | "category": "STEM" 9 | }, 10 | "ancient_chinese": { 11 | "name": "古汉语", 12 | "category": "Social Sciences" 13 | }, 14 | "arts": { 15 | "name": "艺术学", 16 | "category": "Humanities" 17 | }, 18 | "astronomy": { 19 | "name": "天文学", 20 | "category": "STEM" 21 | }, 22 | "business_ethics": { 23 | "name": "商业伦理", 24 | "category": "Social Sciences" 25 | }, 26 | "chinese_civil_service_exam": { 27 | "name": "中国公务员考试", 28 | "category": "Social Sciences" 29 | }, 30 | "chinese_driving_rule": { 31 | "name": "中国驾驶规则", 32 | "category": "Other" 33 | }, 34 | "chinese_food_culture": { 35 | "name": "中国饮食文化", 36 | "category": "Social Sciences" 37 | }, 38 | "chinese_foreign_policy": { 39 | "name": "中国外交政策", 40 | "category": "Social Sciences" 41 | }, 42 | "chinese_history": { 43 | "name": "中国历史", 44 | "category": "Humanities" 45 | }, 46 | "chinese_literature": { 47 | "name": "中国文学", 48 | "category": "Humanities" 49 | }, 50 | "chinese_teacher_qualification": { 51 | "name": "中国教师资格", 52 | "category": "Social Sciences" 53 | }, 54 | "college_actuarial_science": { 55 | "name": "大学精算学", 56 | "category": "STEM" 57 | }, 58 | "college_education": { 59 | "name": "大学教育学", 60 | "category": "Social Sciences" 61 | }, 62 | "college_engineering_hydrology": { 63 | "name": "大学工程水文学", 64 | "category": "STEM" 65 | }, 66 | "college_law": { 67 | "name": "大学法律", 68 | "category": "Humanities" 69 | }, 70 | "college_mathematics": { 71 | "name": "大学数学", 72 | "category": "STEM" 73 | }, 74 | "college_medical_statistics": { 75 | "name": "大学医学统计", 76 | "category": "STEM" 77 | }, 78 | "clinical_knowledge": { 79 | "name": "临床知识", 80 | "category": "Other" 81 | }, 82 | "college_medicine": { 83 | "name": "大学医学", 84 | "category": "Other" 85 | }, 86 | "computer_science": { 87 | "name": "计算机科学", 88 | "category": "STEM" 89 | }, 90 | "computer_security": { 91 | "name": "计算机安全", 92 | "category": "Other" 93 | }, 94 | "conceptual_physics": { 95 | "name": "概念物理学", 96 | "category": "STEM" 97 | }, 98 | "construction_project_management": { 99 | "name": "建设工程管理", 100 | "category": "Other" 101 | }, 102 | "economics": { 103 | "name": "经济学", 104 | "category": "Social Sciences" 105 | }, 106 | "education": { 107 | "name": "教育学", 108 | "category": "Social Sciences" 109 | }, 110 | "elementary_chinese": { 111 | "name": "小学语文", 112 | "category": "Social Sciences" 113 | }, 114 | "elementary_commonsense": { 115 | "name": "小学常识", 116 | "category": "Other" 117 | }, 118 | "elementary_information_and_technology": { 119 | "name": "小学信息技术", 120 | "category": "Other" 121 | }, 122 | "electrical_engineering": { 123 | "name": "电气工程", 124 | "category": "STEM" 125 | }, 126 | "elementary_mathematics": { 127 | "name": "初等数学", 128 | "category": "STEM" 129 | }, 130 | "ethnology": { 131 | "name": "民族学", 132 | "category": "Social Sciences" 133 | }, 134 | "food_science": { 135 | "name": "食品科学", 136 | "category": "Other" 137 | }, 138 | "genetics": { 139 | "name": "遗传学", 140 | "category": "STEM" 141 | }, 142 | "global_facts": { 143 | "name": "全球事实", 144 | "category": "Humanities" 145 | }, 146 | "high_school_biology": { 147 | "name": "高中生物", 148 | "category": "STEM" 149 | }, 150 | "high_school_chemistry": { 151 | "name": "高中化学", 152 | "category": "STEM" 153 | }, 154 | "high_school_geography": { 155 | "name": "高中地理", 156 | "category": "Social Sciences" 157 | }, 158 | "high_school_mathematics": { 159 | "name": "高中数学", 160 | "category": "STEM" 161 | }, 162 | "high_school_physics": { 163 | "name": "高中物理学", 164 | "category": "STEM" 165 | }, 166 | "high_school_politics": { 167 | "name": "高中政治", 168 | "category": "Social Sciences" 169 | }, 170 | "human_sexuality": { 171 | "name": "人类性行为", 172 | "category": "Other" 173 | }, 174 | "international_law": { 175 | "name": "国际法学", 176 | "category": "Humanities" 177 | }, 178 | "journalism": { 179 | "name": "新闻学", 180 | "category": "Social Sciences" 181 | }, 182 | "jurisprudence": { 183 | "name": "法理学", 184 | "category": "Humanities" 185 | }, 186 | "legal_and_moral_basis": { 187 | "name": "法律与道德基础", 188 | "category": "Other" 189 | }, 190 | "logical": { 191 | "name": "逻辑学", 192 | "category": "Humanities" 193 | }, 194 | "machine_learning": { 195 | "name": "机器学习", 196 | "category": "STEM" 197 | }, 198 | "management": { 199 | "name": "管理学", 200 | "category": "Social Sciences" 201 | }, 202 | "marketing": { 203 | "name": "市场营销", 204 | "category": "Social Sciences" 205 | }, 206 | "marxist_theory": { 207 | "name": "马克思主义理论", 208 | "category": "Humanities" 209 | }, 210 | "modern_chinese": { 211 | "name": "现代汉语", 212 | "category": "Social Sciences" 213 | }, 214 | "nutrition": { 215 | "name": "营养学", 216 | "category": "Other" 217 | }, 218 | "philosophy": { 219 | "name": "哲学", 220 | "category": "Humanities" 221 | }, 222 | "professional_accounting": { 223 | "name": "专业会计", 224 | "category": "Social Sciences" 225 | }, 226 | "professional_law": { 227 | "name": "专业法学", 228 | "category": "Humanities" 229 | }, 230 | "professional_medicine": { 231 | "name": "专业医学", 232 | "category": "Other" 233 | }, 234 | "professional_psychology": { 235 | "name": "专业心理学", 236 | "category": "Social Sciences" 237 | }, 238 | "public_relations": { 239 | "name": "公共关系", 240 | "category": "Social Sciences" 241 | }, 242 | "security_study": { 243 | "name": "安全研究", 244 | "category": "Social Sciences" 245 | }, 246 | "sociology": { 247 | "name": "社会学", 248 | "category": "Social Sciences" 249 | }, 250 | "sports_science": { 251 | "name": "体育学", 252 | "category": "Other" 253 | }, 254 | "traditional_chinese_medicine": { 255 | "name": "中医中药", 256 | "category": "Other" 257 | }, 258 | "virology": { 259 | "name": "病毒学", 260 | "category": "STEM" 261 | }, 262 | "world_history": { 263 | "name": "世界历史", 264 | "category": "Humanities" 265 | }, 266 | "world_religions": { 267 | "name": "世界宗教", 268 | "category": "Humanities" 269 | } 270 | } -------------------------------------------------------------------------------- /src/llmtuner/eval/evaluator.py: -------------------------------------------------------------------------------- 1 | # Inspired by: https://github.com/hendrycks/test/blob/master/evaluate_flan.py 2 | 3 | import os 4 | import json 5 | import torch 6 | import inspect 7 | import tiktoken 8 | import numpy as np 9 | from tqdm import tqdm, trange 10 | from typing import Any, Dict, List, Optional 11 | 12 | from datasets import load_dataset 13 | from transformers.utils import cached_file 14 | 15 | from llmtuner.data.template import get_template_and_fix_tokenizer 16 | from llmtuner.eval.template import get_eval_template 17 | from llmtuner.extras.constants import CHOICES, SUBJECTS 18 | from llmtuner.model import dispatch_model, get_eval_args, load_model_and_tokenizer 19 | 20 | 21 | class Evaluator: 22 | 23 | def __init__(self, args: Optional[Dict[str, Any]] = None) -> None: 24 | self.model_args, self.data_args, self.eval_args, finetuning_args = get_eval_args(args) 25 | self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args) 26 | self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2 27 | self.model = dispatch_model(self.model) 28 | self.template = get_template_and_fix_tokenizer(self.data_args.template, self.tokenizer) 29 | self.eval_template = get_eval_template(self.eval_args.lang) 30 | self.choice_inputs = self._encode_choices() 31 | 32 | def _encode_choices(self) -> List[int]: 33 | if isinstance(getattr(self.tokenizer, "tokenizer", None), tiktoken.Encoding): # for tiktoken tokenizer (Qwen) 34 | kwargs = dict(allowed_special="all") 35 | else: 36 | kwargs = dict(add_special_tokens=False) 37 | 38 | return [self.tokenizer.encode(self.eval_template.prefix + ch, **kwargs)[-1] for ch in CHOICES] 39 | 40 | @torch.inference_mode() 41 | def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]: 42 | logits = self.model(**batch_input).logits 43 | lengths = torch.sum(batch_input["attention_mask"], dim=-1) 44 | word_probs = torch.stack([logits[i, lengths[i] - 1] for i in range(len(lengths))], dim=0) 45 | choice_probs = torch.nn.functional.softmax(word_probs[:, self.choice_inputs], dim=-1).detach() 46 | return [chr(ord("A") + offset.item()) for offset in torch.argmax(choice_probs, dim=-1)] 47 | 48 | def eval(self) -> None: 49 | if "token" in inspect.signature(cached_file).parameters: 50 | kwargs = {"token": self.model_args.hf_hub_token} 51 | elif "use_auth_token" in inspect.signature(cached_file).parameters: # for transformers==4.31.0 52 | kwargs = {"use_auth_token": self.model_args.hf_hub_token} 53 | 54 | mapping = cached_file( 55 | path_or_repo_id = os.path.join(self.eval_args.task_dir, self.eval_args.task), 56 | filename="mapping.json", 57 | cache_dir=self.model_args.cache_dir, 58 | **kwargs 59 | ) 60 | 61 | with open(mapping, "r", encoding="utf-8") as f: 62 | categorys: Dict[str, Dict[str, str]] = json.load(f) 63 | 64 | category_corrects = {subj: np.array([], dtype="bool") for subj in SUBJECTS} 65 | pbar = tqdm(categorys.keys(), desc="Processing subjects", position=0) 66 | results = {} 67 | for subject in pbar: 68 | dataset = load_dataset( 69 | path=os.path.join(self.eval_args.task_dir, self.eval_args.task), 70 | name=subject, 71 | cache_dir=self.model_args.cache_dir, 72 | download_mode=self.eval_args.download_mode, 73 | token=self.model_args.hf_hub_token 74 | ) 75 | pbar.set_postfix_str(categorys[subject]["name"]) 76 | inputs, outputs, labels = [], [], [] 77 | for i in trange(len(dataset[self.data_args.split]), desc="Formatting batches", position=1, leave=False): 78 | support_set = dataset["train"].shuffle().select(range(min(self.eval_args.n_shot, len(dataset["train"])))) 79 | query, resp, history = self.eval_template.format_example( 80 | target_data=dataset[self.data_args.split][i], 81 | support_set=support_set, 82 | subject_name=categorys[subject]["name"], 83 | use_history=self.template.use_history 84 | ) 85 | input_ids, _ = self.template.encode_oneturn( 86 | tokenizer=self.tokenizer, query=query, resp=resp, history=history 87 | ) 88 | inputs.append({"input_ids": input_ids, "attention_mask": [1] * len(input_ids)}) 89 | labels.append(resp) 90 | 91 | for i in trange(0, len(inputs), self.eval_args.batch_size, desc="Predicting batches", position=1, leave=False): 92 | batch_input = self.tokenizer.pad( 93 | inputs[i : i + self.eval_args.batch_size], return_attention_mask=True, return_tensors="pt" 94 | ).to(self.model.device) 95 | preds = self.batch_inference(batch_input) 96 | outputs += preds 97 | 98 | corrects = (np.array(outputs) == np.array(labels)) 99 | category_name = categorys[subject]["category"] 100 | category_corrects[category_name] = np.concatenate([category_corrects[category_name], corrects], axis=0) 101 | category_corrects["Average"] = np.concatenate([category_corrects["Average"], corrects], axis=0) 102 | results[subject] = {str(i): outputs[i] for i in range(len(outputs))} 103 | 104 | pbar.close() 105 | self._save_results(category_corrects, results) 106 | 107 | def _save_results(self, category_corrects: Dict[str, np.ndarray], results: Dict[str, Dict[int, str]]) -> None: 108 | score_info = "\n".join([ 109 | "{:>15}: {:.2f}".format(category_name, 100 * np.mean(category_correct)) 110 | for category_name, category_correct in category_corrects.items() if len(category_correct) 111 | ]) 112 | print(score_info) 113 | if self.eval_args.save_dir is not None: 114 | os.makedirs(self.eval_args.save_dir, exist_ok=False) 115 | with open(os.path.join(self.eval_args.save_dir, "results.json"), "w", encoding="utf-8", newline="\n") as f: 116 | json.dump(results, f, indent=2) 117 | 118 | with open(os.path.join(self.eval_args.save_dir, "results.log"), "w", encoding="utf-8", newline="\n") as f: 119 | f.write(score_info) 120 | 121 | 122 | if __name__ == "__main__": 123 | evaluator = Evaluator() 124 | evaluator.eval() 125 | -------------------------------------------------------------------------------- /src/llmtuner/train/dpo/trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import defaultdict 3 | from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple, Union 4 | from transformers import BatchEncoding, Trainer 5 | from trl import DPOTrainer 6 | from trl.trainer.utils import disable_dropout_in_model 7 | 8 | from llmtuner.extras.constants import IGNORE_INDEX 9 | 10 | if TYPE_CHECKING: 11 | from transformers import PreTrainedModel 12 | 13 | 14 | class CustomDPOTrainer(DPOTrainer): 15 | 16 | def __init__( 17 | self, 18 | beta: float, 19 | loss_type: Literal["sigmoid", "hinge", "ipo", "kto"], 20 | ftx_gamma: float, 21 | model: Union["PreTrainedModel", torch.nn.Module], 22 | ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None, 23 | disable_dropout: Optional[bool] = True, 24 | **kwargs 25 | ): 26 | if disable_dropout: 27 | disable_dropout_in_model(model) 28 | if ref_model is not None: 29 | disable_dropout_in_model(ref_model) 30 | 31 | self.use_dpo_data_collator = True # hack to avoid warning 32 | self.generate_during_eval = False # disable at evaluation 33 | self.label_pad_token_id = IGNORE_INDEX 34 | self.padding_value = 0 35 | self.is_encoder_decoder = model.config.is_encoder_decoder 36 | self.precompute_ref_log_probs = False 37 | self._precomputed_train_ref_log_probs = False 38 | self._precomputed_eval_ref_log_probs = False 39 | 40 | self.ref_model = ref_model 41 | self.beta = beta 42 | self.label_smoothing = 0 43 | self.loss_type = loss_type 44 | self.ftx_gamma = ftx_gamma 45 | self._stored_metrics = defaultdict(lambda: defaultdict(list)) 46 | 47 | Trainer.__init__(self, model=model, **kwargs) 48 | if not hasattr(self, "accelerator"): 49 | raise AttributeError("Please update `transformers`.") 50 | 51 | if ref_model is not None: 52 | if self.is_deepspeed_enabled: 53 | if not ( 54 | getattr(ref_model, "is_loaded_in_8bit", False) 55 | or getattr(ref_model, "is_loaded_in_4bit", False) 56 | ): # quantized models are already set on the correct device 57 | self.ref_model = self._prepare_deepspeed(self.ref_model) 58 | else: 59 | self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) 60 | 61 | def sft_loss( 62 | self, 63 | chosen_logits: torch.FloatTensor, 64 | chosen_labels: torch.LongTensor 65 | ) -> torch.Tensor: 66 | r""" 67 | Computes supervised cross-entropy loss of given labels under the given logits. 68 | 69 | Returns: 70 | A tensor of shape (batch_size,) containing the cross-entropy loss of each samples. 71 | """ 72 | all_logps = self.get_batch_logps( 73 | chosen_logits, 74 | chosen_labels, 75 | average_log_prob=True 76 | ) 77 | return -all_logps 78 | 79 | def concatenated_forward( 80 | self, 81 | model: "PreTrainedModel", 82 | batch: Dict[str, torch.Tensor] 83 | ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: 84 | batch_copied = BatchEncoding({k: v.detach().clone() for k, v in batch.items()}) # avoid error 85 | 86 | all_logits = model( 87 | input_ids=batch_copied["input_ids"], 88 | attention_mask=batch_copied["attention_mask"], 89 | return_dict=True 90 | ).logits.to(torch.float32) 91 | 92 | all_logps = self.get_batch_logps( 93 | all_logits, 94 | batch["labels"], 95 | average_log_prob=False 96 | ) 97 | batch_size = batch["input_ids"].size(0) // 2 98 | chosen_logps, rejected_logps = all_logps.split(batch_size, dim=0) 99 | chosen_logits, rejected_logits = all_logits.split(batch_size, dim=0) 100 | return chosen_logps, rejected_logps, chosen_logits, rejected_logits 101 | 102 | def get_batch_loss_metrics( 103 | self, 104 | model: "PreTrainedModel", 105 | batch: Dict[str, torch.Tensor], 106 | train_eval: Optional[Literal["train", "eval"]] = "train" 107 | ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: 108 | r""" 109 | Computes the DPO loss and other metrics for the given batch of inputs for train or test. 110 | """ 111 | metrics = {} 112 | ( 113 | policy_chosen_logps, 114 | policy_rejected_logps, 115 | policy_chosen_logits, 116 | policy_rejected_logits, 117 | ) = self.concatenated_forward(model, batch) 118 | with torch.no_grad(): 119 | if self.ref_model is None: 120 | with self.accelerator.unwrap_model(self.model).disable_adapter(): 121 | ( 122 | reference_chosen_logps, 123 | reference_rejected_logps, 124 | _, 125 | _, 126 | ) = self.concatenated_forward(self.model, batch) 127 | else: 128 | ( 129 | reference_chosen_logps, 130 | reference_rejected_logps, 131 | _, 132 | _, 133 | ) = self.concatenated_forward(self.ref_model, batch) 134 | 135 | losses, chosen_rewards, rejected_rewards = self.dpo_loss( 136 | policy_chosen_logps, 137 | policy_rejected_logps, 138 | reference_chosen_logps, 139 | reference_rejected_logps, 140 | ) 141 | if self.ftx_gamma > 1e-6: 142 | batch_size = batch["input_ids"].size(0) // 2 143 | chosen_labels, _ = batch["labels"].split(batch_size, dim=0) 144 | losses += self.ftx_gamma * self.sft_loss(policy_chosen_logits, chosen_labels) 145 | 146 | reward_accuracies = (chosen_rewards > rejected_rewards).float() 147 | 148 | prefix = "eval_" if train_eval == "eval" else "" 149 | metrics[f"{prefix}rewards/chosen"] = chosen_rewards.cpu().mean() 150 | metrics[f"{prefix}rewards/rejected"] = rejected_rewards.cpu().mean() 151 | metrics[f"{prefix}rewards/accuracies"] = reward_accuracies.cpu().mean() 152 | metrics[f"{prefix}rewards/margins"] = (chosen_rewards - rejected_rewards).cpu().mean() 153 | metrics[f"{prefix}logps/rejected"] = policy_rejected_logps.detach().cpu().mean() 154 | metrics[f"{prefix}logps/chosen"] = policy_chosen_logps.detach().cpu().mean() 155 | metrics[f"{prefix}logits/rejected"] = policy_rejected_logits.detach().cpu().mean() 156 | metrics[f"{prefix}logits/chosen"] = policy_chosen_logits.detach().cpu().mean() 157 | 158 | return losses.mean(), metrics 159 | -------------------------------------------------------------------------------- /tests/llamafy_qwen.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Converts the Qwen models in the same format as LLaMA2. 3 | # Usage: python llamafy_qwen.py --input_dir input --output_dir output --shard_size 10GB 4 | # Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied 5 | 6 | import os 7 | import fire 8 | import json 9 | import torch 10 | from tqdm import tqdm 11 | from collections import OrderedDict 12 | from safetensors import safe_open 13 | from safetensors.torch import save_file 14 | from transformers.modeling_utils import ( 15 | shard_checkpoint, 16 | SAFE_WEIGHTS_NAME, 17 | SAFE_WEIGHTS_INDEX_NAME, 18 | WEIGHTS_NAME, 19 | WEIGHTS_INDEX_NAME 20 | ) 21 | from transformers.utils import check_min_version 22 | from typing import Any, Dict, Optional 23 | 24 | try: 25 | check_min_version("4.34.0") 26 | except: 27 | raise ValueError("Please upgrade `transformers` to 4.34.0") 28 | 29 | 30 | CONFIG_NAME = "config.json" 31 | 32 | 33 | def save_weight( 34 | input_dir: str, 35 | output_dir: str, 36 | shard_size: str, 37 | save_safetensors: bool 38 | ) -> str: 39 | qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict() 40 | for filepath in os.listdir(input_dir): 41 | if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"): 42 | with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f: 43 | for key in f.keys(): 44 | qwen_state_dict[key] = f.get_tensor(key) 45 | 46 | llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() 47 | torch_dtype = None 48 | for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"): 49 | if torch_dtype is None: 50 | torch_dtype = value.dtype 51 | if "wte" in key: 52 | llama2_state_dict["model.embed_tokens.weight"] = value 53 | elif "ln_f" in key: 54 | llama2_state_dict["model.norm.weight"] = value 55 | else: 56 | key = key.replace("transformer.h", "model.layers") 57 | if "attn.c_attn" in key: 58 | proj_size = value.size(0) // 3 59 | llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] 60 | llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[proj_size:2*proj_size, ...] 61 | llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2*proj_size:, ...] 62 | elif "attn.c_proj" in key: 63 | llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value 64 | llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = ( 65 | torch.zeros_like(value[:, 0]).squeeze() 66 | ) 67 | elif "ln_1" in key: 68 | llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value 69 | elif "ln_2" in key: 70 | llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value 71 | elif "mlp.w1" in key: 72 | llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value 73 | elif "mlp.w2" in key: 74 | llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value 75 | elif "mlp.c_proj" in key: 76 | llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value 77 | elif "lm_head" in key: 78 | llama2_state_dict[key] = value 79 | else: 80 | raise KeyError("Unable to process key {}".format(key)) 81 | 82 | weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME 83 | shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) 84 | 85 | for shard_file, shard in tqdm(shards.items(), desc="Save weights"): 86 | if save_safetensors: 87 | save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) 88 | else: 89 | torch.save(shard, os.path.join(output_dir, shard_file)) 90 | 91 | if index is None: 92 | print("Model weights saved in {}".format(os.path.join(output_dir, weights_name))) 93 | else: 94 | index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME 95 | with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: 96 | json.dump(index, f, indent=2, sort_keys=True) 97 | print("Model weights saved in {}".format(output_dir)) 98 | 99 | return str(torch_dtype).replace("torch.", "") 100 | 101 | 102 | def save_config( 103 | input_dir: str, 104 | output_dir: str, 105 | torch_dtype: str 106 | ): 107 | with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f: 108 | qwen_config_dict: Dict[str, Any] = json.load(f) 109 | 110 | llama2_config_dict: Dict[str, Any] = OrderedDict() 111 | llama2_config_dict["architectures"] = ["LlamaForCausalLM"] 112 | llama2_config_dict["hidden_act"] = "silu" 113 | llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"] 114 | llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"] 115 | llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2 116 | llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"] 117 | llama2_config_dict["model_type"] = "llama" 118 | llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"] 119 | llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"] 120 | llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"] 121 | llama2_config_dict["pretraining_tp"] = 1 122 | llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"] 123 | llama2_config_dict["rope_scaling"] = None 124 | llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"] 125 | llama2_config_dict["torch_dtype"] = torch_dtype 126 | llama2_config_dict["transformers_version"] = "4.34.0" 127 | llama2_config_dict["use_cache"] = True 128 | llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"] 129 | llama2_config_dict["attention_bias"] = True 130 | 131 | with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: 132 | json.dump(llama2_config_dict, f, indent=2) 133 | print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME))) 134 | 135 | 136 | def llamafy_qwen( 137 | input_dir: str, 138 | output_dir: str, 139 | shard_size: str, 140 | save_safetensors: Optional[bool] = False 141 | ): 142 | try: 143 | os.makedirs(output_dir, exist_ok=False) 144 | except Exception as e: 145 | raise print("Output dir already exists", e) 146 | 147 | torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors) 148 | save_config(input_dir, output_dir, torch_dtype) 149 | 150 | 151 | if __name__ == "__main__": 152 | fire.Fire(llamafy_qwen) 153 | -------------------------------------------------------------------------------- /src/llmtuner/webui/components/train.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | from typing import TYPE_CHECKING, Dict 3 | from transformers.trainer_utils import SchedulerType 4 | 5 | from llmtuner.extras.constants import TRAINING_STAGES 6 | from llmtuner.webui.common import list_adapters, list_dataset, DEFAULT_DATA_DIR 7 | from llmtuner.webui.components.data import create_preview_box 8 | from llmtuner.webui.utils import gen_plot 9 | 10 | if TYPE_CHECKING: 11 | from gradio.components import Component 12 | from llmtuner.webui.engine import Engine 13 | 14 | 15 | def create_train_tab(engine: "Engine") -> Dict[str, "Component"]: 16 | input_elems = engine.manager.get_base_elems() 17 | elem_dict = dict() 18 | 19 | with gr.Row(): 20 | training_stage = gr.Dropdown( 21 | choices=list(TRAINING_STAGES.keys()), value=list(TRAINING_STAGES.keys())[0], scale=2 22 | ) 23 | dataset_dir = gr.Textbox(value=DEFAULT_DATA_DIR, scale=2) 24 | dataset = gr.Dropdown(multiselect=True, scale=4) 25 | preview_elems = create_preview_box(dataset_dir, dataset) 26 | 27 | training_stage.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False) 28 | dataset_dir.change(list_dataset, [dataset_dir, training_stage], [dataset], queue=False) 29 | 30 | input_elems.update({training_stage, dataset_dir, dataset}) 31 | elem_dict.update(dict( 32 | training_stage=training_stage, dataset_dir=dataset_dir, dataset=dataset, **preview_elems 33 | )) 34 | 35 | with gr.Row(): 36 | cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1) 37 | learning_rate = gr.Textbox(value="5e-5") 38 | num_train_epochs = gr.Textbox(value="3.0") 39 | max_samples = gr.Textbox(value="100000") 40 | compute_type = gr.Radio(choices=["fp16", "bf16"], value="fp16") 41 | 42 | input_elems.update({cutoff_len, learning_rate, num_train_epochs, max_samples, compute_type}) 43 | elem_dict.update(dict( 44 | cutoff_len=cutoff_len, learning_rate=learning_rate, num_train_epochs=num_train_epochs, 45 | max_samples=max_samples, compute_type=compute_type 46 | )) 47 | 48 | with gr.Row(): 49 | batch_size = gr.Slider(value=4, minimum=1, maximum=512, step=1) 50 | gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=512, step=1) 51 | lr_scheduler_type = gr.Dropdown( 52 | choices=[scheduler.value for scheduler in SchedulerType], value="cosine" 53 | ) 54 | max_grad_norm = gr.Textbox(value="1.0") 55 | val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001) 56 | 57 | input_elems.update({batch_size, gradient_accumulation_steps, lr_scheduler_type, max_grad_norm, val_size}) 58 | elem_dict.update(dict( 59 | batch_size=batch_size, gradient_accumulation_steps=gradient_accumulation_steps, 60 | lr_scheduler_type=lr_scheduler_type, max_grad_norm=max_grad_norm, val_size=val_size 61 | )) 62 | 63 | with gr.Accordion(label="Extra config", open=False) as extra_tab: 64 | with gr.Row(): 65 | logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5) 66 | save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10) 67 | warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1) 68 | neftune_alpha = gr.Slider(value=0, minimum=0, maximum=10, step=0.1) 69 | 70 | with gr.Column(): 71 | train_on_prompt = gr.Checkbox(value=False) 72 | upcast_layernorm = gr.Checkbox(value=False) 73 | 74 | input_elems.update({logging_steps, save_steps, warmup_steps, neftune_alpha, train_on_prompt, upcast_layernorm}) 75 | elem_dict.update(dict( 76 | extra_tab=extra_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps, 77 | neftune_alpha=neftune_alpha, train_on_prompt=train_on_prompt, upcast_layernorm=upcast_layernorm 78 | )) 79 | 80 | with gr.Accordion(label="LoRA config", open=False) as lora_tab: 81 | with gr.Row(): 82 | lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1) 83 | lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1) 84 | lora_target = gr.Textbox(scale=1) 85 | additional_target = gr.Textbox(scale=1) 86 | create_new_adapter = gr.Checkbox(scale=1) 87 | 88 | input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, create_new_adapter}) 89 | elem_dict.update(dict( 90 | lora_tab=lora_tab, lora_rank=lora_rank, lora_dropout=lora_dropout, lora_target=lora_target, 91 | additional_target=additional_target, create_new_adapter=create_new_adapter 92 | )) 93 | 94 | with gr.Accordion(label="RLHF config", open=False) as rlhf_tab: 95 | with gr.Row(): 96 | dpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1) 97 | reward_model = gr.Dropdown(scale=3, allow_custom_value=True) 98 | refresh_btn = gr.Button(scale=1) 99 | 100 | refresh_btn.click( 101 | list_adapters, 102 | [engine.manager.get_elem_by_name("top.model_name"), engine.manager.get_elem_by_name("top.finetuning_type")], 103 | [reward_model], 104 | queue=False 105 | ) 106 | 107 | input_elems.update({dpo_beta, reward_model}) 108 | elem_dict.update(dict(rlhf_tab=rlhf_tab, dpo_beta=dpo_beta, reward_model=reward_model, refresh_btn=refresh_btn)) 109 | 110 | with gr.Row(): 111 | cmd_preview_btn = gr.Button() 112 | start_btn = gr.Button() 113 | stop_btn = gr.Button() 114 | 115 | with gr.Row(): 116 | with gr.Column(scale=3): 117 | with gr.Row(): 118 | output_dir = gr.Textbox() 119 | 120 | with gr.Row(): 121 | resume_btn = gr.Checkbox(visible=False, interactive=False, value=False) 122 | process_bar = gr.Slider(visible=False, interactive=False) 123 | 124 | with gr.Box(): 125 | output_box = gr.Markdown() 126 | 127 | with gr.Column(scale=1): 128 | loss_viewer = gr.Plot() 129 | 130 | input_elems.add(output_dir) 131 | output_elems = [output_box, process_bar] 132 | 133 | cmd_preview_btn.click(engine.runner.preview_train, input_elems, output_elems) 134 | start_btn.click(engine.runner.run_train, input_elems, output_elems) 135 | stop_btn.click(engine.runner.set_abort, queue=False) 136 | resume_btn.change(engine.runner.monitor, outputs=output_elems) 137 | 138 | elem_dict.update(dict( 139 | cmd_preview_btn=cmd_preview_btn, start_btn=start_btn, stop_btn=stop_btn, output_dir=output_dir, 140 | resume_btn=resume_btn, process_bar=process_bar, output_box=output_box, loss_viewer=loss_viewer 141 | )) 142 | 143 | output_box.change( 144 | gen_plot, 145 | [ 146 | engine.manager.get_elem_by_name("top.model_name"), 147 | engine.manager.get_elem_by_name("top.finetuning_type"), 148 | output_dir 149 | ], 150 | loss_viewer, 151 | queue=False 152 | ) 153 | 154 | return elem_dict 155 | --------------------------------------------------------------------------------