├── README.md
├── eval
    ├── data
    │   ├── aime
    │   │   └── test.jsonl
    │   ├── amc
    │   │   └── test.jsonl
    │   ├── cn_math_2024
    │   │   └── test.jsonl
    │   ├── gaokao
    │   │   └── test.jsonl
    │   ├── gpqa
    │   │   └── test.jsonl
    │   ├── grade_school_math
    │   │   └── test.jsonl
    │   ├── kaoyan
    │   │   └── test.jsonl
    │   ├── math
    │   │   └── test.jsonl
    │   ├── minerva
    │   │   └── test.jsonl
    │   └── olympiadbench
    │   │   └── test.jsonl
    ├── eval.py
    ├── eval.sh
    ├── prompt.txt
    ├── prompts
    │   └── qwen-instruct
    │   │   ├── aime.py
    │   │   ├── amc.py
    │   │   ├── gpqa.py
    │   │   ├── math.py
    │   │   ├── minerva.py
    │   │   └── olympiadbench.py
    ├── readme.md
    ├── requirements.txt
    └── utils
    │   ├── __pycache__
    │       ├── grader.cpython-310.pyc
    │       ├── math_normalization.cpython-310.pyc
    │       └── parser.cpython-310.pyc
    │   ├── data_loader.py
    │   ├── examples.py
    │   ├── grader.py
    │   ├── math_normalization.py
    │   ├── parser.py
    │   └── utils.py
├── images
    └── limo.png
└── train
    ├── CITATION.cff
    ├── LICENSE
    ├── MANIFEST.in
    ├── Makefile
    ├── README.md
    ├── README_zh.md
    ├── assets
        ├── benchmark.svg
        ├── logo.png
        ├── wechat.jpg
        └── wechat_npu.jpg
    ├── data
        ├── README.md
        ├── README_zh.md
        ├── alpaca_en_demo.json
        ├── alpaca_zh_demo.json
        ├── belle_multiturn
        │   └── belle_multiturn.py
        ├── c4_demo.json
        ├── dataset_info.json
        ├── dpo_en_demo.json
        ├── dpo_zh_demo.json
        ├── glaive_toolcall_en_demo.json
        ├── glaive_toolcall_zh_demo.json
        ├── hh_rlhf_en
        │   └── hh_rlhf_en.py
        ├── identity.json
        ├── kto_en_demo.json
        ├── limo.json
        ├── mllm_audio_demo.json
        ├── mllm_demo.json
        ├── mllm_demo_data
        │   ├── 1.jpg
        │   ├── 1.mp3
        │   ├── 1.mp4
        │   ├── 2.avi
        │   ├── 2.jpg
        │   ├── 2.wav
        │   ├── 3.flac
        │   ├── 3.jpg
        │   └── 3.mp4
        ├── mllm_video_demo.json
        ├── ultra_chat
        │   └── ultra_chat.py
        └── wiki_demo.txt
    ├── docker
        ├── docker-cuda
        │   ├── Dockerfile
        │   └── docker-compose.yml
        ├── docker-npu
        │   ├── Dockerfile
        │   └── docker-compose.yml
        └── docker-rocm
        │   ├── Dockerfile
        │   └── docker-compose.yml
    ├── evaluation
        ├── ceval
        │   ├── ceval.py
        │   ├── ceval.zip
        │   └── mapping.json
        ├── cmmlu
        │   ├── cmmlu.py
        │   ├── cmmlu.zip
        │   └── mapping.json
        └── mmlu
        │   ├── mapping.json
        │   ├── mmlu.py
        │   └── mmlu.zip
    ├── examples
        ├── README.md
        ├── README_zh.md
        ├── accelerate
        │   └── fsdp_config.yaml
        ├── deepspeed
        │   ├── ds_z0_config.json
        │   ├── ds_z2_config.json
        │   ├── ds_z2_offload_config.json
        │   ├── ds_z3_config.json
        │   └── ds_z3_offload_config.json
        ├── extras
        │   ├── adam_mini
        │   │   └── qwen2_full_sft.yaml
        │   ├── apollo
        │   │   └── llama3_full_sft.yaml
        │   ├── badam
        │   │   └── llama3_full_sft.yaml
        │   ├── fsdp_qlora
        │   │   ├── llama3_lora_sft.yaml
        │   │   └── train.sh
        │   ├── galore
        │   │   └── llama3_full_sft.yaml
        │   ├── llama_pro
        │   │   ├── expand.sh
        │   │   └── llama3_freeze_sft.yaml
        │   ├── loraplus
        │   │   └── llama3_lora_sft.yaml
        │   ├── mod
        │   │   └── llama3_full_sft.yaml
        │   ├── nlg_eval
        │   │   └── llama3_lora_predict.yaml
        │   └── pissa
        │   │   ├── init.sh
        │   │   └── llama3_lora_sft.yaml
        ├── inference
        │   ├── llama3.yaml
        │   ├── llama3_full_sft.yaml
        │   ├── llama3_lora_sft.yaml
        │   ├── llama3_vllm.yaml
        │   ├── llava1_5.yaml
        │   └── qwen2_vl.yaml
        ├── merge_lora
        │   ├── llama3_gptq.yaml
        │   ├── llama3_lora_sft.yaml
        │   └── qwen2vl_lora_sft.yaml
        ├── train_full
        │   ├── llama3_full_sft.yaml
        │   └── qwen2vl_full_sft.yaml
        ├── train_limo.yaml
        ├── train_lora
        │   ├── llama3_lora_dpo.yaml
        │   ├── llama3_lora_eval.yaml
        │   ├── llama3_lora_kto.yaml
        │   ├── llama3_lora_ppo.yaml
        │   ├── llama3_lora_pretrain.yaml
        │   ├── llama3_lora_reward.yaml
        │   ├── llama3_lora_sft.yaml
        │   ├── llama3_lora_sft_ds3.yaml
        │   ├── llama3_lora_sft_ray.yaml
        │   ├── llama3_preprocess.yaml
        │   ├── llava1_5_lora_sft.yaml
        │   ├── qwen2vl_lora_dpo.yaml
        │   └── qwen2vl_lora_sft.yaml
        └── train_qlora
        │   ├── llama3_lora_sft_aqlm.yaml
        │   ├── llama3_lora_sft_awq.yaml
        │   ├── llama3_lora_sft_bnb_npu.yaml
        │   ├── llama3_lora_sft_gptq.yaml
        │   └── llama3_lora_sft_otfq.yaml
    ├── pyproject.toml
    ├── requirements.txt
    ├── scripts
        ├── api_example
        │   ├── test_image.py
        │   └── test_toolcall.py
        ├── convert_ckpt
        │   ├── llamafy_baichuan2.py
        │   └── llamafy_qwen.py
        ├── llama_pro.py
        ├── loftq_init.py
        ├── pissa_init.py
        ├── stat_utils
        │   ├── cal_flops.py
        │   ├── cal_lr.py
        │   ├── cal_mfu.py
        │   ├── cal_ppl.py
        │   └── length_cdf.py
        └── vllm_infer.py
    ├── setup.py
    ├── src
        ├── api.py
        ├── llamafactory
        │   ├── __init__.py
        │   ├── api
        │   │   ├── __init__.py
        │   │   ├── app.py
        │   │   ├── chat.py
        │   │   ├── common.py
        │   │   └── protocol.py
        │   ├── chat
        │   │   ├── __init__.py
        │   │   ├── base_engine.py
        │   │   ├── chat_model.py
        │   │   ├── hf_engine.py
        │   │   └── vllm_engine.py
        │   ├── cli.py
        │   ├── data
        │   │   ├── __init__.py
        │   │   ├── aligner.py
        │   │   ├── collator.py
        │   │   ├── data_utils.py
        │   │   ├── formatter.py
        │   │   ├── loader.py
        │   │   ├── mm_plugin.py
        │   │   ├── parser.py
        │   │   ├── preprocess.py
        │   │   ├── processors
        │   │   │   ├── __init__.py
        │   │   │   ├── feedback.py
        │   │   │   ├── pairwise.py
        │   │   │   ├── pretrain.py
        │   │   │   ├── processor_utils.py
        │   │   │   ├── supervised.py
        │   │   │   └── unsupervised.py
        │   │   ├── template.py
        │   │   └── tool_utils.py
        │   ├── eval
        │   │   ├── __init__.py
        │   │   ├── evaluator.py
        │   │   └── template.py
        │   ├── extras
        │   │   ├── __init__.py
        │   │   ├── constants.py
        │   │   ├── env.py
        │   │   ├── logging.py
        │   │   ├── misc.py
        │   │   ├── packages.py
        │   │   └── ploting.py
        │   ├── hparams
        │   │   ├── __init__.py
        │   │   ├── data_args.py
        │   │   ├── evaluation_args.py
        │   │   ├── finetuning_args.py
        │   │   ├── generating_args.py
        │   │   ├── model_args.py
        │   │   ├── parser.py
        │   │   └── training_args.py
        │   ├── launcher.py
        │   ├── model
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── loader.py
        │   │   ├── model_utils
        │   │   │   ├── __init__.py
        │   │   │   ├── attention.py
        │   │   │   ├── checkpointing.py
        │   │   │   ├── embedding.py
        │   │   │   ├── liger_kernel.py
        │   │   │   ├── longlora.py
        │   │   │   ├── misc.py
        │   │   │   ├── mod.py
        │   │   │   ├── moe.py
        │   │   │   ├── packing.py
        │   │   │   ├── quantization.py
        │   │   │   ├── rope.py
        │   │   │   ├── unsloth.py
        │   │   │   ├── valuehead.py
        │   │   │   └── visual.py
        │   │   └── patcher.py
        │   ├── train
        │   │   ├── __init__.py
        │   │   ├── callbacks.py
        │   │   ├── dpo
        │   │   │   ├── __init__.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── kto
        │   │   │   ├── __init__.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── ppo
        │   │   │   ├── __init__.py
        │   │   │   ├── ppo_utils.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── pt
        │   │   │   ├── __init__.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── rm
        │   │   │   ├── __init__.py
        │   │   │   ├── metric.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── sft
        │   │   │   ├── __init__.py
        │   │   │   ├── metric.py
        │   │   │   ├── trainer.py
        │   │   │   └── workflow.py
        │   │   ├── test_utils.py
        │   │   ├── trainer_utils.py
        │   │   └── tuner.py
        │   └── webui
        │   │   ├── __init__.py
        │   │   ├── chatter.py
        │   │   ├── common.py
        │   │   ├── components
        │   │       ├── __init__.py
        │   │       ├── chatbot.py
        │   │       ├── data.py
        │   │       ├── eval.py
        │   │       ├── export.py
        │   │       ├── infer.py
        │   │       ├── top.py
        │   │       └── train.py
        │   │   ├── control.py
        │   │   ├── css.py
        │   │   ├── engine.py
        │   │   ├── interface.py
        │   │   ├── locales.py
        │   │   ├── manager.py
        │   │   └── runner.py
        ├── train.py
        └── webui.py
    └── tests
        ├── data
            ├── processors
            │   ├── test_feedback.py
            │   ├── test_pairwise.py
            │   ├── test_processor_utils.py
            │   ├── test_supervised.py
            │   └── test_unsupervised.py
            ├── test_collator.py
            ├── test_formatter.py
            ├── test_mm_plugin.py
            └── test_template.py
        ├── e2e
            ├── test_chat.py
            └── test_train.py
        ├── eval
            └── test_eval_template.py
        ├── model
            ├── model_utils
            │   ├── test_attention.py
            │   ├── test_checkpointing.py
            │   ├── test_misc.py
            │   ├── test_packing.py
            │   └── test_visual.py
            ├── test_base.py
            ├── test_freeze.py
            ├── test_full.py
            ├── test_lora.py
            └── test_pissa.py
        └── train
            └── test_sft_trainer.py


/eval/eval.sh:
--------------------------------------------------------------------------------
 1 | CUDA_VISIBLE_DEVICES='0,1,2,3' \
 2 | python eval.py \
 3 | --model_name_or_path "Qwen/Qwen2.5-32B-Instruct" \
 4 | --data_name "math" \
 5 | --prompt_type "qwen-instruct" \
 6 | --temperature 0.0 \
 7 | --start_idx 0 \
 8 | --end_idx -1 \
 9 | --n_sampling 1 \
10 | --k 1 \
11 | --split "test" \
12 | --max_tokens 32768 \
13 | --seed 0 \
14 | --top_p 1 \
15 | --surround_with_messages \
16 | 
17 | 


--------------------------------------------------------------------------------
/eval/prompt.txt:
--------------------------------------------------------------------------------
 1 | ## System Prompt
 2 | 
 3 | You are an experienced examiner who evaluates whether a student's answer to a given question is correct. 
 4 | Your task is to determine if the student's final answer matches the standard answer provided, based solely on correctness and the question's specific requirements. 
 5 | Do not perform any additional calculations or reinterpret the question. Simply compare the student's answer to the standard answer to determine if it satisfies the question's requirements.
 6 | 
 7 | Focus strictly on:
 8 | 1. Understanding the exact requirement of the question.
 9 | 2. Comparing the student's final answer directly to the provided standard answer.
10 | 3. Your task is not to solve the problem but to determine whether the student's answer is correct based on the question's requirements. Avoid any unnecessary analysis, assumptions, or re-solving the problem.
11 | 
12 | Note:
13 | - For intervals/ranges: The student's answer must cover the EXACT SAME range as the standard answer, NOT just any single value or subset within that range;
14 | - If the standard answer contains multiple solutions connected by "or"/"and", all of them must be listed in the student's answer;
15 | - You must be deterministic - always declare the answer as either CORRECT or WRONG;
16 | 
17 | Your response must include:
18 | ## Analysis
19 | <Provide a brief and direct analysis that compares the student's answer to the standard answer>
20 | 
21 | ## Correctness
22 | <CORRECT/WRONG>
23 | 
24 | 
25 | ## User Prompt
26 | 
27 | Question: {problem}
28 | 
29 | Standard Answer: {standard_answer}
30 | 
31 | Student's Final Answer: {model pred's final answer}
32 | 


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/aime.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/amc.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/gpqa.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/math.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/minerva.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/prompts/qwen-instruct/olympiadbench.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | system_prompt = "Please reason step by step, and put your final answer within \\boxed{}."
4 | 
5 | few_shot_prompt = ""
6 | 
7 | question_format = """{question}"""


--------------------------------------------------------------------------------
/eval/requirements.txt:
--------------------------------------------------------------------------------
 1 | # common
 2 | vllm<=0.6.1
 3 | tqdm
 4 | datasets
 5 | torch
 6 | transformers
 7 | python_dateutil
 8 | flash_attn
 9 | 
10 | # math_eval
11 | sympy==1.12
12 | antlr4-python3-runtime==4.11.1 # ! The version needs to be compatible with sympy.
13 | word2number
14 | Pebble
15 | timeout-decorator
16 | latex2sympy2==1.9.1


--------------------------------------------------------------------------------
/eval/utils/__pycache__/grader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/eval/utils/__pycache__/grader.cpython-310.pyc


--------------------------------------------------------------------------------
/eval/utils/__pycache__/math_normalization.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/eval/utils/__pycache__/math_normalization.cpython-310.pyc


--------------------------------------------------------------------------------
/eval/utils/__pycache__/parser.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/eval/utils/__pycache__/parser.cpython-310.pyc


--------------------------------------------------------------------------------
/eval/utils/data_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import random
 4 | from datasets import load_dataset, Dataset, concatenate_datasets
 5 | from utils.utils import load_jsonl, lower_keys
 6 | 
 7 | def load_data(data_name, split, data_dir='./data'):
 8 |     data_file = f"{data_dir}/{data_name}/{split}.jsonl"
 9 |     if os.path.exists(data_file):
10 |         examples = list(load_jsonl(data_file))
11 |     else:
12 |         if data_name == "math":
13 |             dataset = load_dataset("competition_math", split=split, name="main", cache_dir=f"{data_dir}/temp")
14 |         elif data_name == "theorem-qa":
15 |             dataset = load_dataset("wenhu/TheoremQA", split=split)
16 |         elif data_name == "gsm8k":
17 |             dataset = load_dataset(data_name, split=split)
18 |         elif data_name == "gsm-hard":
19 |             dataset = load_dataset("reasoning-machines/gsm-hard", split="train")
20 |         elif data_name == "svamp":
21 |             # evaluate on training set + test set 
22 |             dataset = load_dataset("ChilleD/SVAMP", split="train")
23 |             dataset = concatenate_datasets([dataset, load_dataset("ChilleD/SVAMP", split="test")])
24 |         elif data_name == "asdiv":
25 |             dataset = load_dataset("EleutherAI/asdiv", split="validation")
26 |             dataset = dataset.filter(lambda x: ";" not in x['answer']) # remove multi-answer examples
27 |         elif data_name == "mawps":
28 |             examples = []
29 |             # four sub-tasks
30 |             for data_name in ["singleeq", "singleop", "addsub", "multiarith"]:
31 |                 sub_examples = list(load_jsonl(f"{data_dir}/mawps/{data_name}.jsonl"))
32 |                 for example in sub_examples:
33 |                     example['type'] = data_name
34 |                 examples.extend(sub_examples)
35 |             dataset = Dataset.from_list(examples)
36 |         elif data_name == "finqa":
37 |             dataset = load_dataset("dreamerdeo/finqa", split=split, name="main")
38 |             dataset = dataset.select(random.sample(range(len(dataset)), 1000))
39 |         elif data_name == "tabmwp":
40 |             examples = []
41 |             with open(f"{data_dir}/tabmwp/tabmwp_{split}.json", "r") as f:
42 |                 data_dict = json.load(f)
43 |                 examples.extend(data_dict.values())
44 |             dataset = Dataset.from_list(examples)
45 |             dataset = dataset.select(random.sample(range(len(dataset)), 1000))
46 |         elif data_name == "bbh":
47 |             examples = []
48 |             for data_name in ["reasoning_about_colored_objects", "penguins_in_a_table",\
49 |                             "date_understanding", "repeat_copy_logic", "object_counting"]:
50 |                 with open(f"{data_dir}/bbh/bbh/{data_name}.json", "r") as f:
51 |                     sub_examples = json.load(f)["examples"]
52 |                     for example in sub_examples:
53 |                         example['type'] = data_name
54 |                     examples.extend(sub_examples)
55 |             dataset = Dataset.from_list(examples)
56 |         else:
57 |             raise NotImplementedError(data_name)
58 | 
59 |         examples = list(dataset)
60 |         examples = [lower_keys(example) for example in examples]
61 |         dataset = Dataset.from_list(examples)
62 |         os.makedirs(f"{data_dir}/{data_name}", exist_ok=True)
63 |         dataset.to_json(data_file)
64 | 
65 |     # add 'idx' in the first column
66 |     if 'idx' not in examples[0]:
67 |         examples = [{'idx': i, **example} for i, example in enumerate(examples)]
68 | 
69 |     # dedepulicate & sort
70 |     examples = sorted(examples, key=lambda x: x['idx'])
71 |     return examples


--------------------------------------------------------------------------------
/images/limo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/images/limo.png


--------------------------------------------------------------------------------
/train/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | date-released: 2024-03
 3 | message: "If you use this software, please cite it as below."
 4 | authors:
 5 | - family-names: "Zheng"
 6 |   given-names: "Yaowei"
 7 | - family-names: "Zhang"
 8 |   given-names: "Richong"
 9 | - family-names: "Zhang"
10 |   given-names: "Junhao"
11 | - family-names: "Ye"
12 |   given-names: "Yanhan"
13 | - family-names: "Luo"
14 |   given-names: "Zheyan"
15 | - family-names: "Feng"
16 |   given-names: "Zhangchi"
17 | - family-names: "Ma"
18 |   given-names: "Yongqiang"
19 | title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
20 | url: "https://arxiv.org/abs/2403.13372"
21 | preferred-citation:
22 |   type: conference-paper
23 |   conference:
24 |     name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
25 |   authors:
26 |     - family-names: "Zheng"
27 |       given-names: "Yaowei"
28 |     - family-names: "Zhang"
29 |       given-names: "Richong"
30 |     - family-names: "Zhang"
31 |       given-names: "Junhao"
32 |     - family-names: "Ye"
33 |       given-names: "Yanhan"
34 |     - family-names: "Luo"
35 |       given-names: "Zheyan"
36 |     - family-names: "Feng"
37 |       given-names: "Zhangchi"
38 |     - family-names: "Ma"
39 |       given-names: "Yongqiang"
40 |   title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
41 |   url: "https://arxiv.org/abs/2403.13372"
42 |   year: 2024
43 |   publisher: "Association for Computational Linguistics"
44 |   address: "Bangkok, Thailand"
45 | 


--------------------------------------------------------------------------------
/train/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE requirements.txt
2 | 


--------------------------------------------------------------------------------
/train/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build commit quality style test
 2 | 
 3 | check_dirs := scripts src tests setup.py
 4 | 
 5 | build:
 6 | 	pip install build && python -m build
 7 | 
 8 | commit:
 9 | 	pre-commit install
10 | 	pre-commit run --all-files
11 | 
12 | quality:
13 | 	ruff check $(check_dirs)
14 | 	ruff format --check $(check_dirs)
15 | 
16 | style:
17 | 	ruff check $(check_dirs) --fix
18 | 	ruff format $(check_dirs)
19 | 
20 | test:
21 | 	CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
22 | 


--------------------------------------------------------------------------------
/train/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/assets/logo.png


--------------------------------------------------------------------------------
/train/assets/wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/assets/wechat.jpg


--------------------------------------------------------------------------------
/train/assets/wechat_npu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/assets/wechat_npu.jpg


--------------------------------------------------------------------------------
/train/data/belle_multiturn/belle_multiturn.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import datasets
 5 | 
 6 | 
 7 | _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 8 | 
 9 | _DESCRIPTION = "BELLE multiturn chat dataset."
10 | 
11 | _CITATION = """\
12 | @article{belle2023exploring,
13 |   title={Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases},
14 |   author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
15 |   journal={arXiv preprint arXiv:2303.14742},
16 |   year={2023}
17 | }
18 | """
19 | 
20 | _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
21 | _LICENSE = "gpl-3.0"
22 | _URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
23 | 
24 | 
25 | class BelleMultiturn(datasets.GeneratorBasedBuilder):
26 |     VERSION = datasets.Version("0.0.0")
27 | 
28 |     def _info(self):
29 |         features = datasets.Features(
30 |             {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
31 |         )
32 |         return datasets.DatasetInfo(
33 |             description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
34 |         )
35 | 
36 |     def _split_generators(self, dl_manager: datasets.DownloadManager):
37 |         file_path = dl_manager.download(_URL)
38 |         return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
39 | 
40 |     def _generate_examples(self, filepath: str):
41 |         with open(filepath, encoding="utf-8") as f:
42 |             for key, row in enumerate(f):
43 |                 data = json.loads(row)
44 |                 conversations = []
45 |                 prompt = data["instruction"].strip()
46 |                 response = data["output"].strip()
47 | 
48 |                 assist_idx = prompt.rfind("Assistant:")
49 |                 human_idx = prompt.rfind("Human:")
50 |                 query = prompt[human_idx + 6 : assist_idx].strip()
51 |                 prompt = prompt[:human_idx].strip()
52 |                 conversations.insert(0, {"from": "gpt", "value": response})
53 |                 conversations.insert(0, {"from": "human", "value": query})
54 | 
55 |                 while prompt.rfind("Assistant:") != -1:
56 |                     assist_idx = prompt.rfind("Assistant:")
57 |                     human_idx = prompt.rfind("Human:")
58 |                     if human_idx != -1:
59 |                         old_query = prompt[human_idx + 6 : assist_idx].strip()
60 |                         old_resp = prompt[assist_idx + 10 :].strip()
61 |                         conversations.insert(0, {"from": "gpt", "value": old_resp})
62 |                         conversations.insert(0, {"from": "human", "value": old_query})
63 |                     else:
64 |                         break
65 |                     prompt = prompt[:human_idx].strip()
66 | 
67 |                 yield key, {"conversations": conversations}
68 | 


--------------------------------------------------------------------------------
/train/data/mllm_audio_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<audio>What's that sound?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "It is the sound of glass shattering.",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "audios": [
14 |       "mllm_demo_data/1.mp3"
15 |     ]
16 |   },
17 |   {
18 |     "messages": [
19 |       {
20 |         "content": "<audio>What can you hear?",
21 |         "role": "user"
22 |       },
23 |       {
24 |         "content": "A woman is coughing.",
25 |         "role": "assistant"
26 |       }
27 |     ],
28 |     "audios": [
29 |       "mllm_demo_data/2.wav"
30 |     ]
31 |   },
32 |   {
33 |     "messages": [
34 |       {
35 |         "content": "<audio>What does the person say?",
36 |         "role": "user"
37 |       },
38 |       {
39 |         "content": "Mister Quiller is the apostle of the middle classes and we are glad to welcome his gospel.",
40 |         "role": "assistant"
41 |       }
42 |     ],
43 |     "audios": [
44 |       "mllm_demo_data/3.flac"
45 |     ]
46 |   }
47 | ]
48 | 


--------------------------------------------------------------------------------
/train/data/mllm_demo.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "messages": [
  4 |       {
  5 |         "content": "<image>Who are they?",
  6 |         "role": "user"
  7 |       },
  8 |       {
  9 |         "content": "They're Kane and Gretzka from Bayern Munich.",
 10 |         "role": "assistant"
 11 |       },
 12 |       {
 13 |         "content": "What are they doing?",
 14 |         "role": "user"
 15 |       },
 16 |       {
 17 |         "content": "They are celebrating on the soccer field.",
 18 |         "role": "assistant"
 19 |       }
 20 |     ],
 21 |     "images": [
 22 |       "mllm_demo_data/1.jpg"
 23 |     ]
 24 |   },
 25 |   {
 26 |     "messages": [
 27 |       {
 28 |         "content": "<image>Who is he?",
 29 |         "role": "user"
 30 |       },
 31 |       {
 32 |         "content": "He's Thomas Muller from Bayern Munich.",
 33 |         "role": "assistant"
 34 |       },
 35 |       {
 36 |         "content": "Why is he on the ground?",
 37 |         "role": "user"
 38 |       },
 39 |       {
 40 |         "content": "Because he's sliding on his knees to celebrate.",
 41 |         "role": "assistant"
 42 |       }
 43 |     ],
 44 |     "images": [
 45 |       "mllm_demo_data/2.jpg"
 46 |     ]
 47 |   },
 48 |   {
 49 |     "messages": [
 50 |       {
 51 |         "content": "<image>Please describe this image",
 52 |         "role": "user"
 53 |       },
 54 |       {
 55 |         "content": "Chinese astronaut Gui Haichao is giving a speech.",
 56 |         "role": "assistant"
 57 |       },
 58 |       {
 59 |         "content": "What has he accomplished?",
 60 |         "role": "user"
 61 |       },
 62 |       {
 63 |         "content": "He was appointed to be a payload specialist on Shenzhou 16 mission in June 2022, thus becoming the first Chinese civilian of Group 3 in space on 30 May 2023. He is responsible for the on-orbit operation of space science experimental payloads.",
 64 |         "role": "assistant"
 65 |       }
 66 |     ],
 67 |     "images": [
 68 |       "mllm_demo_data/3.jpg"
 69 |     ]
 70 |   },
 71 |   {
 72 |     "messages": [
 73 |       {
 74 |         "content": "<image>他们是谁？",
 75 |         "role": "user"
 76 |       },
 77 |       {
 78 |         "content": "他们是拜仁慕尼黑的凯恩和格雷茨卡。",
 79 |         "role": "assistant"
 80 |       },
 81 |       {
 82 |         "content": "他们在做什么？",
 83 |         "role": "user"
 84 |       },
 85 |       {
 86 |         "content": "他们在足球场上庆祝。",
 87 |         "role": "assistant"
 88 |       }
 89 |     ],
 90 |     "images": [
 91 |       "mllm_demo_data/1.jpg"
 92 |     ]
 93 |   },
 94 |   {
 95 |     "messages": [
 96 |       {
 97 |         "content": "<image>他是谁？",
 98 |         "role": "user"
 99 |       },
100 |       {
101 |         "content": "他是来自拜仁慕尼黑的托马斯·穆勒。",
102 |         "role": "assistant"
103 |       },
104 |       {
105 |         "content": "他为什么在地上？",
106 |         "role": "user"
107 |       },
108 |       {
109 |         "content": "因为他正在双膝跪地滑行庆祝。",
110 |         "role": "assistant"
111 |       }
112 |     ],
113 |     "images": [
114 |       "mllm_demo_data/2.jpg"
115 |     ]
116 |   },
117 |   {
118 |     "messages": [
119 |       {
120 |         "content": "<image>请描述这张图片",
121 |         "role": "user"
122 |       },
123 |       {
124 |         "content": "中国宇航员桂海潮正在讲话。",
125 |         "role": "assistant"
126 |       },
127 |       {
128 |         "content": "他取得过哪些成就？",
129 |         "role": "user"
130 |       },
131 |       {
132 |         "content": "他于2022年6月被任命为神舟十六号任务的有效载荷专家，从而成为2023年5月30日进入太空的首位平民宇航员。他负责在轨操作空间科学实验有效载荷。",
133 |         "role": "assistant"
134 |       }
135 |     ],
136 |     "images": [
137 |       "mllm_demo_data/3.jpg"
138 |     ]
139 |   }
140 | ]
141 | 


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/1.jpg


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/1.mp3


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/1.mp4


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/2.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/2.avi


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/2.jpg


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/2.wav


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/3.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/3.flac


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/3.jpg


--------------------------------------------------------------------------------
/train/data/mllm_demo_data/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/data/mllm_demo_data/3.mp4


--------------------------------------------------------------------------------
/train/data/mllm_video_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<video>Why is this video funny?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "Because a baby is reading, and he is so cute!",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "videos": [
14 |       "mllm_demo_data/1.mp4"
15 |     ]
16 |   },
17 |   {
18 |     "messages": [
19 |       {
20 |         "content": "<video>What is she doing?",
21 |         "role": "user"
22 |       },
23 |       {
24 |         "content": "She is cooking.",
25 |         "role": "assistant"
26 |       }
27 |     ],
28 |     "videos": [
29 |       "mllm_demo_data/2.avi"
30 |     ]
31 |   },
32 |   {
33 |     "messages": [
34 |       {
35 |         "content": "<video>What's in the video?",
36 |         "role": "user"
37 |       },
38 |       {
39 |         "content": "A baby is playing in the living room.",
40 |         "role": "assistant"
41 |       }
42 |     ],
43 |     "videos": [
44 |       "mllm_demo_data/3.mp4"
45 |     ]
46 |   }
47 | ]
48 | 


--------------------------------------------------------------------------------
/train/data/ultra_chat/ultra_chat.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import List
 4 | 
 5 | import datasets
 6 | 
 7 | 
 8 | _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 9 | 
10 | _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
11 | 
12 | _CITATION = """\
13 | @misc{UltraChat,
14 |   author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
15 |   title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
16 |   year = {2023},
17 |   publisher = {GitHub},
18 |   journal = {GitHub repository},
19 |   howpublished = {\\url{https://github.com/thunlp/ultrachat}},
20 | }
21 | """
22 | 
23 | _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
24 | _LICENSE = "cc-by-nc-4.0"
25 | _BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
26 | 
27 | 
28 | class UltraChat(datasets.GeneratorBasedBuilder):
29 |     VERSION = datasets.Version("0.0.0")
30 | 
31 |     def _info(self):
32 |         features = datasets.Features(
33 |             {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
34 |         )
35 |         return datasets.DatasetInfo(
36 |             description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
37 |         )
38 | 
39 |     def _split_generators(self, dl_manager: datasets.DownloadManager):
40 |         file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)]  # multiple shards
41 |         return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
42 | 
43 |     def _generate_examples(self, filepaths: List[str]):
44 |         for filepath in filepaths:
45 |             with open(filepath, encoding="utf-8") as f:
46 |                 for row in f:
47 |                     try:
48 |                         data = json.loads(row)
49 |                     except Exception:
50 |                         continue
51 |                     key: int = data["id"]
52 |                     content: List[str] = data["data"]
53 |                     if len(content) % 2 == 1:
54 |                         content.pop(-1)
55 |                     if len(content) < 2:
56 |                         continue
57 |                     conversations = [
58 |                         {"from": "human" if i % 2 == 0 else "gpt", "value": content[i]} for i in range(len(content))
59 |                     ]
60 |                     yield key, {"conversations": conversations}
61 | 


--------------------------------------------------------------------------------
/train/docker/docker-cuda/Dockerfile:
--------------------------------------------------------------------------------
  1 | # Default use the NVIDIA official image with PyTorch 2.3.0
  2 | # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html
  3 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:24.02-py3
  4 | FROM ${BASE_IMAGE}
  5 | 
  6 | # Define environments
  7 | ENV MAX_JOBS=4
  8 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
  9 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 10 | 
 11 | # Define installation arguments
 12 | ARG INSTALL_BNB=false
 13 | ARG INSTALL_VLLM=false
 14 | ARG INSTALL_DEEPSPEED=false
 15 | ARG INSTALL_FLASHATTN=false
 16 | ARG INSTALL_LIGER_KERNEL=false
 17 | ARG INSTALL_HQQ=false
 18 | ARG INSTALL_EETQ=false
 19 | ARG PIP_INDEX=https://pypi.org/simple
 20 | ARG HTTP_PROXY=
 21 | 
 22 | # Set the working directory
 23 | WORKDIR /app
 24 | 
 25 | # Set http proxy
 26 | RUN if [ -n "$HTTP_PROXY" ]; then \
 27 |         echo "Configuring proxy..."; \
 28 |         export http_proxy=$HTTP_PROXY; \
 29 |         export https_proxy=$HTTP_PROXY; \
 30 |     fi
 31 | 
 32 | # Install the requirements
 33 | COPY requirements.txt /app
 34 | RUN pip config set global.index-url "$PIP_INDEX" && \
 35 |     pip config set global.extra-index-url "$PIP_INDEX" && \
 36 |     python -m pip install --upgrade pip && \
 37 |     if [ -n "$HTTP_PROXY" ]; then \
 38 |         python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
 39 |     else \
 40 |         python -m pip install -r requirements.txt; \
 41 |     fi
 42 | 
 43 | # Copy the rest of the application into the image
 44 | COPY . /app
 45 | 
 46 | # Install the LLaMA Factory
 47 | RUN EXTRA_PACKAGES="metrics"; \
 48 |     if [ "$INSTALL_BNB" == "true" ]; then \
 49 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
 50 |     fi; \
 51 |     if [ "$INSTALL_VLLM" == "true" ]; then \
 52 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
 53 |     fi; \
 54 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
 55 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
 56 |     fi; \
 57 |     if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
 58 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
 59 |     fi; \
 60 |     if [ "$INSTALL_HQQ" == "true" ]; then \
 61 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
 62 |     fi; \
 63 |     if [ "$INSTALL_EETQ" == "true" ]; then \
 64 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
 65 |     fi; \
 66 |     if [ -n "$HTTP_PROXY" ]; then \
 67 |         pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
 68 |     else \
 69 |         pip install -e ".[$EXTRA_PACKAGES]"; \
 70 |     fi
 71 | 
 72 | # Rebuild flash attention
 73 | RUN pip uninstall -y transformer-engine flash-attn && \
 74 |     if [ "$INSTALL_FLASHATTN" == "true" ]; then \
 75 |         pip uninstall -y ninja && \
 76 |         if [ -n "$HTTP_PROXY" ]; then \
 77 |             pip install --proxy=$HTTP_PROXY ninja && \
 78 |             pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
 79 |         else \
 80 |             pip install ninja && \
 81 |             pip install --no-cache-dir flash-attn --no-build-isolation; \
 82 |         fi; \
 83 |     fi
 84 | 
 85 | 
 86 | # Unset http proxy
 87 | RUN if [ -n "$HTTP_PROXY" ]; then \
 88 |         unset http_proxy; \
 89 |         unset https_proxy; \
 90 |     fi
 91 | 
 92 | # Set up volumes
 93 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
 94 | 
 95 | # Expose port 7860 for the LLaMA Board
 96 | ENV GRADIO_SERVER_PORT 7860
 97 | EXPOSE 7860
 98 | 
 99 | # Expose port 8000 for the API service
100 | ENV API_PORT 8000
101 | EXPOSE 8000
102 | 


--------------------------------------------------------------------------------
/train/docker/docker-cuda/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-cuda/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_BNB: false
 8 |         INSTALL_VLLM: false
 9 |         INSTALL_DEEPSPEED: false
10 |         INSTALL_FLASHATTN: false
11 |         INSTALL_LIGER_KERNEL: false
12 |         INSTALL_HQQ: false
13 |         INSTALL_EETQ: false
14 |         PIP_INDEX: https://pypi.org/simple
15 |     container_name: llamafactory
16 |     volumes:
17 |       - ../../hf_cache:/root/.cache/huggingface
18 |       - ../../ms_cache:/root/.cache/modelscope
19 |       - ../../om_cache:/root/.cache/openmind
20 |       - ../../data:/app/data
21 |       - ../../output:/app/output
22 |     ports:
23 |       - "7860:7860"
24 |       - "8000:8000"
25 |     ipc: host
26 |     tty: true
27 |     shm_size: '16gb'
28 |     stdin_open: true
29 |     command: bash
30 |     deploy:
31 |       resources:
32 |         reservations:
33 |           devices:
34 |           - driver: nvidia
35 |             count: "all"
36 |             capabilities: [gpu]
37 |     restart: unless-stopped
38 | 


--------------------------------------------------------------------------------
/train/docker/docker-npu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the Ubuntu 22.04 image with CANN 8.0.rc1
 2 | # More versions can be found at https://hub.docker.com/r/ascendai/cann/tags
 3 | # FROM ascendai/cann:8.0.rc1-910-ubuntu22.04-py3.8
 4 | FROM ascendai/cann:8.0.rc1-910b-ubuntu22.04-py3.8
 5 | # FROM ascendai/cann:8.0.rc1-910-openeuler22.03-py3.8
 6 | # FROM ascendai/cann:8.0.rc1-910b-openeuler22.03-py3.8
 7 | 
 8 | # Define environments
 9 | ENV DEBIAN_FRONTEND=noninteractive
10 | 
11 | # Define installation arguments
12 | ARG INSTALL_DEEPSPEED=false
13 | ARG PIP_INDEX=https://pypi.org/simple
14 | ARG TORCH_INDEX=https://download.pytorch.org/whl/cpu
15 | ARG HTTP_PROXY=
16 | 
17 | # Set the working directory
18 | WORKDIR /app
19 | 
20 | # Set http proxy
21 | RUN if [ -n "$HTTP_PROXY" ]; then \
22 |         echo "Configuring proxy..."; \
23 |         export http_proxy=$HTTP_PROXY; \
24 |         export https_proxy=$HTTP_PROXY; \
25 |     fi
26 | 
27 | # Install the requirements
28 | COPY requirements.txt /app
29 | RUN pip config set global.index-url "$PIP_INDEX" && \
30 |     pip config set global.extra-index-url "$TORCH_INDEX" && \
31 |     python -m pip install --upgrade pip && \
32 |     if [ -n "$HTTP_PROXY" ]; then \
33 |         python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
34 |     else \
35 |         python -m pip install -r requirements.txt; \
36 |     fi
37 | 
38 | # Copy the rest of the application into the image
39 | COPY . /app
40 | 
41 | # Install the LLaMA Factory
42 | RUN EXTRA_PACKAGES="torch-npu,metrics"; \
43 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
44 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
45 |     fi; \
46 |     if [ -n "$HTTP_PROXY" ]; then \
47 |         pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
48 |     else \
49 |         pip install -e ".[$EXTRA_PACKAGES]"; \
50 |     fi
51 | 
52 | # Unset http proxy
53 | RUN if [ -n "$HTTP_PROXY" ]; then \
54 |         unset http_proxy; \
55 |         unset https_proxy; \
56 |     fi
57 | 
58 | # Set up volumes
59 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
60 | 
61 | # Expose port 7860 for the LLaMA Board
62 | ENV GRADIO_SERVER_PORT 7860
63 | EXPOSE 7860
64 | 
65 | # Expose port 8000 for the API service
66 | ENV API_PORT 8000
67 | EXPOSE 8000
68 | 


--------------------------------------------------------------------------------
/train/docker/docker-npu/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-npu/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_DEEPSPEED: false
 8 |         PIP_INDEX: https://pypi.org/simple
 9 |     container_name: llamafactory
10 |     volumes:
11 |       - ../../hf_cache:/root/.cache/huggingface
12 |       - ../../ms_cache:/root/.cache/modelscope
13 |       - ../../om_cache:/root/.cache/openmind
14 |       - ../../data:/app/data
15 |       - ../../output:/app/output
16 |       - /usr/local/dcmi:/usr/local/dcmi
17 |       - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
18 |       - /usr/local/Ascend/driver:/usr/local/Ascend/driver
19 |       - /etc/ascend_install.info:/etc/ascend_install.info
20 |     ports:
21 |       - "7860:7860"
22 |       - "8000:8000"
23 |     ipc: host
24 |     tty: true
25 |     shm_size: '16gb'
26 |     stdin_open: true
27 |     command: bash
28 |     devices:
29 |       - /dev/davinci0
30 |       - /dev/davinci_manager
31 |       - /dev/devmm_svm
32 |       - /dev/hisi_hdc
33 |     restart: unless-stopped
34 | 


--------------------------------------------------------------------------------
/train/docker/docker-rocm/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM hardandheavy/transformers-rocm:2.2.0
 2 | 
 3 | # Define environments
 4 | ENV MAX_JOBS=4
 5 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 6 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 7 | 
 8 | # Define installation arguments
 9 | ARG INSTALL_BNB=false
10 | ARG INSTALL_VLLM=false
11 | ARG INSTALL_DEEPSPEED=false
12 | ARG INSTALL_FLASHATTN=false
13 | ARG INSTALL_LIGER_KERNEL=false
14 | ARG INSTALL_HQQ=false
15 | ARG PIP_INDEX=https://pypi.org/simple
16 | ARG HTTP_PROXY=
17 | 
18 | # Set the working directory
19 | WORKDIR /app
20 | 
21 | # Set http proxy
22 | RUN if [ -n "$HTTP_PROXY" ]; then \
23 |         echo "Configuring proxy..."; \
24 |         export http_proxy=$HTTP_PROXY; \
25 |         export https_proxy=$HTTP_PROXY; \
26 |     fi
27 | 
28 | # Install the requirements
29 | COPY requirements.txt /app
30 | RUN pip config set global.index-url "$PIP_INDEX" && \
31 |     pip config set global.extra-index-url "$PIP_INDEX" && \
32 |     python -m pip install --upgrade pip && \
33 |     if [ -n "$HTTP_PROXY" ]; then \
34 |         python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
35 |     else \
36 |         python -m pip install -r requirements.txt; \
37 |     fi
38 | 
39 | # Copy the rest of the application into the image
40 | COPY . /app
41 | 
42 | # Install the LLaMA Factory
43 | RUN EXTRA_PACKAGES="metrics"; \
44 |     if [ "$INSTALL_BNB" == "true" ]; then \
45 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
46 |     fi; \
47 |     if [ "$INSTALL_VLLM" == "true" ]; then \
48 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
49 |     fi; \
50 |     if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
51 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
52 |     fi; \
53 |     if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
54 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
55 |     fi; \
56 |     if [ "$INSTALL_HQQ" == "true" ]; then \
57 |         EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
58 |     fi; \
59 |     if [ -n "$HTTP_PROXY" ]; then \
60 |         pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
61 |     else \
62 |         pip install -e ".[$EXTRA_PACKAGES]"; \
63 |     fi
64 | 
65 | # Rebuild flash attention
66 | RUN pip uninstall -y transformer-engine flash-attn && \
67 |     if [ "$INSTALL_FLASHATTN" == "true" ]; then \
68 |         pip uninstall -y ninja && \
69 |         if [ -n "$HTTP_PROXY" ]; then \
70 |             pip install --proxy=$HTTP_PROXY ninja && \
71 |             pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
72 |         else \
73 |             pip install ninja && \
74 |             pip install --no-cache-dir flash-attn --no-build-isolation; \
75 |         fi; \
76 |     fi
77 | 
78 | # Unset http proxy
79 | RUN if [ -n "$HTTP_PROXY" ]; then \
80 |         unset http_proxy; \
81 |         unset https_proxy; \
82 |     fi
83 | 
84 | # Set up volumes
85 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
86 | 
87 | # Expose port 7860 for the LLaMA Board
88 | ENV GRADIO_SERVER_PORT 7860
89 | EXPOSE 7860
90 | 
91 | # Expose port 8000 for the API service
92 | ENV API_PORT 8000
93 | EXPOSE 8000
94 | 


--------------------------------------------------------------------------------
/train/docker/docker-rocm/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-rocm/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         INSTALL_BNB: false
 8 |         INSTALL_VLLM: false
 9 |         INSTALL_DEEPSPEED: false
10 |         INSTALL_FLASHATTN: false
11 |         INSTALL_LIGER_KERNEL: false
12 |         INSTALL_HQQ: false
13 |         PIP_INDEX: https://pypi.org/simple
14 |     container_name: llamafactory
15 |     volumes:
16 |       - ../../hf_cache:/root/.cache/huggingface
17 |       - ../../ms_cache:/root/.cache/modelscope
18 |       - ../../om_cache:/root/.cache/openmind
19 |       - ../../data:/app/data
20 |       - ../../output:/app/output
21 |       - ../../saves:/app/saves
22 |     ports:
23 |       - "7860:7860"
24 |       - "8000:8000"
25 |     ipc: host
26 |     tty: true
27 |     shm_size: '16gb'
28 |     stdin_open: true
29 |     command: bash
30 |     devices:
31 |       - /dev/kfd:/dev/kfd
32 |       - /dev/dri:/dev/dri
33 |     restart: unless-stopped
34 | 


--------------------------------------------------------------------------------
/train/evaluation/ceval/ceval.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/evaluation/ceval/ceval.zip


--------------------------------------------------------------------------------
/train/evaluation/cmmlu/cmmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/evaluation/cmmlu/cmmlu.zip


--------------------------------------------------------------------------------
/train/evaluation/mmlu/mmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/evaluation/mmlu/mmlu.zip


--------------------------------------------------------------------------------
/train/examples/accelerate/fsdp_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_forward_prefetch: false
 9 |   fsdp_cpu_ram_efficient_loading: true
10 |   fsdp_offload_params: true # offload may affect training speed
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: fp16 # or bf16
18 | num_machines: 1 # the number of nodes
19 | num_processes: 2 # the number of GPUs in all nodes
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/train/examples/deepspeed/ds_z0_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 0,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/train/examples/deepspeed/ds_z2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": true,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/train/examples/deepspeed/ds_z2_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "allgather_partitions": true,
25 |     "allgather_bucket_size": 5e8,
26 |     "overlap_comm": true,
27 |     "reduce_scatter": true,
28 |     "reduce_bucket_size": 5e8,
29 |     "contiguous_gradients": true,
30 |     "round_robin_gradients": true
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/train/examples/deepspeed/ds_z3_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "overlap_comm": true,
21 |     "contiguous_gradients": true,
22 |     "sub_group_size": 1e9,
23 |     "reduce_bucket_size": "auto",
24 |     "stage3_prefetch_bucket_size": "auto",
25 |     "stage3_param_persistence_threshold": "auto",
26 |     "stage3_max_live_parameters": 1e9,
27 |     "stage3_max_reuse_distance": 1e9,
28 |     "stage3_gather_16bit_weights_on_model_save": true
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/train/examples/deepspeed/ds_z3_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "offload_param": {
25 |       "device": "cpu",
26 |       "pin_memory": true
27 |     },
28 |     "overlap_comm": true,
29 |     "contiguous_gradients": true,
30 |     "sub_group_size": 1e9,
31 |     "reduce_bucket_size": "auto",
32 |     "stage3_prefetch_bucket_size": "auto",
33 |     "stage3_param_persistence_threshold": "auto",
34 |     "stage3_max_live_parameters": 1e9,
35 |     "stage3_max_reuse_distance": 1e9,
36 |     "stage3_gather_16bit_weights_on_model_save": true
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/train/examples/extras/adam_mini/qwen2_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_adam_mini: true
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: qwen
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/qwen2-1_5b/full/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 1.0e-5
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | # val_size: 0.1
38 | # per_device_eval_batch_size: 1
39 | # eval_strategy: steps
40 | # eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/train/examples/extras/apollo/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_apollo: true
10 | apollo_layerwise: true  # choices: [true, false], use false for DDP training
11 | apollo_target: all
12 | apollo_rank: 128
13 | apollo_scale: 32.0
14 | apollo_scale_type: channel
15 | 
16 | ### dataset
17 | dataset: identity,alpaca_en_demo
18 | template: llama3
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | 
24 | ### output
25 | output_dir: saves/llama3-8b/full/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 1  # use 1 for layerwise apollo
34 | learning_rate: 1.0e-5
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | pure_bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/train/examples/extras/badam/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_badam: true
10 | badam_mode: layer
11 | badam_switch_mode: ascending
12 | badam_switch_interval: 50
13 | badam_verbose: 2
14 | # deepspeed: examples/deepspeed/ds_z3_config.json
15 | 
16 | ### dataset
17 | dataset: identity,alpaca_en_demo
18 | template: llama3
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | 
24 | ### output
25 | output_dir: saves/llama3-8b/full/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 8
34 | learning_rate: 1.0e-5
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/train/examples/extras/fsdp_qlora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | stage: sft
 8 | do_train: true
 9 | finetuning_type: lora
10 | lora_rank: 8
11 | lora_target: all
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 1.0e-4
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | # val_size: 0.1
40 | # per_device_eval_batch_size: 1
41 | # eval_strategy: steps
42 | # eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/train/examples/extras/fsdp_qlora/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # DO NOT use GPTQ/AWQ model in FSDP+QLoRA
3 | 
4 | CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
5 |     --config_file examples/accelerate/fsdp_config.yaml \
6 |     src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
7 | 


--------------------------------------------------------------------------------
/train/examples/extras/galore/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_galore: true
10 | galore_layerwise: true  # choices: [true, false], use false for DDP training
11 | galore_target: all
12 | galore_rank: 128
13 | galore_scale: 2.0
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/full/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 1  # use 1 for layerwise galore
33 | learning_rate: 1.0e-5
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | pure_bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/train/examples/extras/llama_pro/expand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/llama_pro.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pro \
6 |     --num_expand 8
7 | 


--------------------------------------------------------------------------------
/train/examples/extras/llama_pro/llama3_freeze_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: models/llama3-8b-pro
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: freeze
 9 | freeze_trainable_layers: 8
10 | freeze_trainable_modules: all
11 | use_llama_pro: true
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b-pro/freeze/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 1.0e-4
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | # val_size: 0.1
40 | # per_device_eval_batch_size: 1
41 | # eval_strategy: steps
42 | # eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/train/examples/extras/loraplus/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | loraplus_lr_ratio: 16.0
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 1.0e-4
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | # val_size: 0.1
40 | # per_device_eval_batch_size: 1
41 | # eval_strategy: steps
42 | # eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/train/examples/extras/mod/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | mixture_of_depths: convert
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b-mod/full/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | optim: paged_adamw_8bit
30 | learning_rate: 1.0e-5
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | pure_bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/extras/nlg_eval/llama3_lora_predict.yaml:
--------------------------------------------------------------------------------
 1 | # The batch generation can be SLOW using this config.
 2 | # For faster inference, we recommend to use `scripts/vllm_infer.py`.
 3 | 
 4 | ### model
 5 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 6 | adapter_name_or_path: saves/llama3-8b/lora/sft
 7 | trust_remote_code: true
 8 | 
 9 | ### method
10 | stage: sft
11 | do_predict: true
12 | finetuning_type: lora
13 | 
14 | ### dataset
15 | eval_dataset: identity,alpaca_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 50
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/predict
24 | overwrite_output_dir: true
25 | 
26 | ### eval
27 | per_device_eval_batch_size: 1
28 | predict_with_generate: true
29 | ddp_timeout: 180000000
30 | 


--------------------------------------------------------------------------------
/train/examples/extras/pissa/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/pissa_init.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pissa
6 | 


--------------------------------------------------------------------------------
/train/examples/extras/pissa/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pissa_init: true
12 | pissa_iter: 16
13 | pissa_convert: true
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/lora/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/train/examples/inference/llama3.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/train/examples/inference/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: saves/llama3-8b/full/sft
2 | template: llama3
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/train/examples/inference/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | adapter_name_or_path: saves/llama3-8b/lora/sft
3 | template: llama3
4 | infer_backend: huggingface  # choices: [huggingface, vllm]
5 | trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/train/examples/inference/llama3_vllm.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: vllm
4 | vllm_enforce_eager: true
5 | trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/train/examples/inference/llava1_5.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: llava-hf/llava-1.5-7b-hf
2 | template: llava
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/train/examples/inference/qwen2_vl.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
2 | template: qwen2_vl
3 | infer_backend: huggingface  # choices: [huggingface, vllm]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/train/examples/merge_lora/llama3_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | template: llama3
 4 | trust_remote_code: true
 5 | 
 6 | ### export
 7 | export_dir: models/llama3_gptq
 8 | export_quantization_bit: 4
 9 | export_quantization_dataset: data/c4_demo.json
10 | export_size: 2
11 | export_device: cpu
12 | export_legacy_format: false
13 | 


--------------------------------------------------------------------------------
/train/examples/merge_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 5 | adapter_name_or_path: saves/llama3-8b/lora/sft
 6 | template: llama3
 7 | finetuning_type: lora
 8 | trust_remote_code: true
 9 | 
10 | ### export
11 | export_dir: models/llama3_lora_sft
12 | export_size: 2
13 | export_device: cpu
14 | export_legacy_format: false
15 | 


--------------------------------------------------------------------------------
/train/examples/merge_lora/qwen2vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 5 | adapter_name_or_path: saves/qwen2_vl-7b/lora/sft
 6 | template: qwen2_vl
 7 | finetuning_type: lora
 8 | trust_remote_code: true
 9 | 
10 | ### export
11 | export_dir: models/qwen2_vl_lora_sft
12 | export_size: 2
13 | export_device: cpu
14 | export_legacy_format: false
15 | 


--------------------------------------------------------------------------------
/train/examples/train_full/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/full/sft
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 2
29 | learning_rate: 1.0e-5
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | # val_size: 0.1
38 | # per_device_eval_batch_size: 1
39 | # eval_strategy: steps
40 | # eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/train/examples/train_full/qwen2vl_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | image_resolution: 262144
 4 | video_resolution: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: full
11 | freeze_vision_tower: true  # choices: [true, false]
12 | freeze_multi_modal_projector: true  # choices: [true, false]
13 | train_mm_proj_only: false  # choices: [true, false]
14 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
15 | 
16 | ### dataset
17 | dataset: mllm_demo,identity,alpaca_en_demo
18 | template: qwen2_vl
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | 
24 | ### output
25 | output_dir: saves/qwen2_vl-7b/full/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 2
34 | learning_rate: 1.0e-5
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/train/examples/train_limo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2.5-32B-Instruct
 3 | 
 4 | ### method
 5 | stage: sft
 6 | do_train: true
 7 | finetuning_type: full
 8 | deepspeed: examples/deepspeed/ds_z3_config.json
 9 | flash_attn: fa2
10 | 
11 | ### dataset
12 | dataset: <the limo dataset>
13 | cutoff_len: 16384
14 | overwrite_cache: true
15 | preprocessing_num_workers: 64
16 | template: qwen
17 | 
18 | ### output
19 | output_dir: <custom your own path>
20 | logging_steps: 1
21 | save_strategy: epoch
22 | plot_loss: true
23 | overwrite_output_dir: true
24 | 
25 | ### train
26 | per_device_train_batch_size: 1
27 | gradient_accumulation_steps: 1
28 | learning_rate: 5.0e-6
29 | num_train_epochs: 15
30 | lr_scheduler_type: cosine
31 | warmup_ratio: 0.0
32 | bf16: true
33 | ddp_timeout: 180000000
34 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: dpo
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pref_beta: 0.1
12 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
13 | 
14 | ### dataset
15 | dataset: dpo_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/dpo
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 5.0e-6
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_eval.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | adapter_name_or_path: saves/llama3-8b/lora/sft
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | finetuning_type: lora
 8 | 
 9 | ### dataset
10 | task: mmlu_test  # choices: [mmlu_test, ceval_validation, cmmlu_test]
11 | template: fewshot
12 | lang: en
13 | n_shot: 5
14 | 
15 | ### output
16 | save_dir: saves/llama3-8b/lora/eval
17 | 
18 | ### eval
19 | batch_size: 4
20 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_kto.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: kto
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pref_beta: 0.1
12 | 
13 | ### dataset
14 | dataset: kto_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/kto
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 5.0e-6
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | # val_size: 0.1
40 | # per_device_eval_batch_size: 1
41 | # eval_strategy: steps
42 | # eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_ppo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | reward_model: saves/llama3-8b/lora/reward
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | stage: ppo
 8 | do_train: true
 9 | finetuning_type: lora
10 | lora_rank: 8
11 | lora_target: all
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/ppo
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 8
31 | learning_rate: 1.0e-5
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### generate
39 | max_new_tokens: 512
40 | top_k: 0
41 | top_p: 0.9
42 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_pretrain.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: pt
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: c4_demo
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | 
19 | ### output
20 | output_dir: saves/llama3-8b/lora/pretrain
21 | logging_steps: 10
22 | save_steps: 500
23 | plot_loss: true
24 | overwrite_output_dir: true
25 | 
26 | ### train
27 | per_device_train_batch_size: 1
28 | gradient_accumulation_steps: 8
29 | learning_rate: 1.0e-4
30 | num_train_epochs: 3.0
31 | lr_scheduler_type: cosine
32 | warmup_ratio: 0.1
33 | bf16: true
34 | ddp_timeout: 180000000
35 | 
36 | ### eval
37 | # val_size: 0.1
38 | # per_device_eval_batch_size: 1
39 | # eval_strategy: steps
40 | # eval_steps: 500
41 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_reward.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: rm
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: dpo_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/reward
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_sft_ds3.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### train
29 | per_device_train_batch_size: 1
30 | gradient_accumulation_steps: 2
31 | learning_rate: 1.0e-4
32 | num_train_epochs: 3.0
33 | lr_scheduler_type: cosine
34 | warmup_ratio: 0.1
35 | bf16: true
36 | ddp_timeout: 180000000
37 | 
38 | ### eval
39 | # val_size: 0.1
40 | # per_device_eval_batch_size: 1
41 | # eval_strategy: steps
42 | # eval_steps: 500
43 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_lora_sft_ray.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct  # or use local absolute path
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | dataset_dir: REMOTE:llamafactory/demo_data  # or use local absolute path
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | 
21 | ### output
22 | output_dir: tmp_dir
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | 
28 | ### ray
29 | ray_run_name: llama3_8b_sft_lora
30 | ray_num_workers: 4  # number of GPUs to use
31 | resources_per_worker:
32 |   GPU: 1
33 | placement_strategy: PACK
34 | 
35 | ### train
36 | per_device_train_batch_size: 1
37 | gradient_accumulation_steps: 8
38 | learning_rate: 1.0e-4
39 | num_train_epochs: 3.0
40 | lr_scheduler_type: cosine
41 | warmup_ratio: 0.1
42 | bf16: true
43 | ddp_timeout: 180000000
44 | 
45 | ### eval
46 | # val_size: 0.1
47 | # per_device_eval_batch_size: 1
48 | # eval_strategy: steps
49 | # eval_steps: 500
50 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llama3_preprocess.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | tokenized_path: saves/llama3-8b/dataset/sft
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | overwrite_output_dir: true
24 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/llava1_5_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: llava-hf/llava-1.5-7b-hf
 3 | image_resolution: 262144
 4 | video_resolution: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | 
14 | ### dataset
15 | dataset: mllm_demo
16 | template: llava
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/llava1_5-7b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-4
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/qwen2vl_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | image_resolution: 262144
 4 | video_resolution: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: dpo
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | pref_beta: 0.1
14 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
15 | 
16 | ### dataset
17 | dataset: rlhf_v
18 | template: qwen2_vl
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | 
24 | ### output
25 | output_dir: saves/qwen2_vl-7b/lora/dpo
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 8
34 | learning_rate: 5.0e-6
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/train/examples/train_lora/qwen2vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 3 | image_resolution: 262144
 4 | video_resolution: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | 
14 | ### dataset
15 | dataset: mllm_demo,identity,alpaca_en_demo  # video: mllm_video_demo
16 | template: qwen2_vl
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/qwen2_vl-7b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-4
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/train/examples/train_qlora/llama3_lora_sft_aqlm.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/train_qlora/llama3_lora_sft_awq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | quantization_method: bitsandbytes
 5 | double_quantization: false
 6 | trust_remote_code: true
 7 | 
 8 | ### method
 9 | stage: sft
10 | do_train: true
11 | finetuning_type: lora
12 | lora_rank: 8
13 | lora_target: all
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/lora/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/train/examples/train_qlora/llama3_lora_sft_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | 
27 | ### train
28 | per_device_train_batch_size: 1
29 | gradient_accumulation_steps: 8
30 | learning_rate: 1.0e-4
31 | num_train_epochs: 3.0
32 | lr_scheduler_type: cosine
33 | warmup_ratio: 0.1
34 | bf16: true
35 | ddp_timeout: 180000000
36 | 
37 | ### eval
38 | # val_size: 0.1
39 | # per_device_eval_batch_size: 1
40 | # eval_strategy: steps
41 | # eval_steps: 500
42 | 


--------------------------------------------------------------------------------
/train/examples/train_qlora/llama3_lora_sft_otfq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | quantization_method: bitsandbytes  # choices: [bitsandbytes (4/8), hqq (2/3/4/5/6/8), eetq (8)]
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | 
14 | ### dataset
15 | dataset: identity,alpaca_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-4
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/train/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.ruff]
 6 | target-version = "py38"
 7 | line-length = 119
 8 | indent-width = 4
 9 | 
10 | [tool.ruff.lint]
11 | ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
12 | select = ["C", "E", "F", "I", "W"]
13 | 
14 | [tool.ruff.lint.isort]
15 | lines-after-imports = 2
16 | known-first-party = ["llamafactory"]
17 | known-third-party = [
18 |     "accelerate",
19 |     "datasets",
20 |     "gradio",
21 |     "numpy",
22 |     "peft",
23 |     "torch",
24 |     "transformers",
25 |     "trl"
26 | ]
27 | 
28 | [tool.ruff.format]
29 | quote-style = "double"
30 | indent-style = "space"
31 | docstring-code-format = true
32 | skip-magic-trailing-comma = false
33 | line-ending = "auto"
34 | 


--------------------------------------------------------------------------------
/train/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.41.2,<=4.45.2;python_version<'3.10'
 2 | transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
 3 | datasets>=2.16.0,<=3.2.0
 4 | accelerate>=0.34.0,<=1.2.1
 5 | peft>=0.11.1,<=0.12.0
 6 | trl>=0.8.6,<=0.9.6
 7 | tokenizers>=0.19.0,<=0.21.0
 8 | gradio>=4.38.0,<=5.12.0
 9 | pandas>=2.0.0
10 | scipy
11 | einops
12 | sentencepiece
13 | tiktoken
14 | protobuf
15 | uvicorn
16 | pydantic
17 | fastapi
18 | sse-starlette
19 | matplotlib>=3.7.0
20 | fire
21 | packaging
22 | pyyaml
23 | numpy<2.0.0
24 | av
25 | librosa
26 | tyro<0.9.0
27 | 


--------------------------------------------------------------------------------
/train/scripts/api_example/test_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from openai import OpenAI
18 | from transformers.utils.versions import require_version
19 | 
20 | 
21 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
22 | 
23 | 
24 | def main():
25 |     client = OpenAI(
26 |         api_key="{}".format(os.environ.get("API_KEY", "0")),
27 |         base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
28 |     )
29 |     messages = []
30 |     messages.append(
31 |         {
32 |             "role": "user",
33 |             "content": [
34 |                 {"type": "text", "text": "Output the color and number of each box."},
35 |                 {
36 |                     "type": "image_url",
37 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"},
38 |                 },
39 |             ],
40 |         }
41 |     )
42 |     result = client.chat.completions.create(messages=messages, model="test")
43 |     messages.append(result.choices[0].message)
44 |     print("Round 1:", result.choices[0].message.content)
45 |     # The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
46 |     messages.append(
47 |         {
48 |             "role": "user",
49 |             "content": [
50 |                 {"type": "text", "text": "What kind of flower is this?"},
51 |                 {
52 |                     "type": "image_url",
53 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"},
54 |                 },
55 |             ],
56 |         }
57 |     )
58 |     result = client.chat.completions.create(messages=messages, model="test")
59 |     messages.append(result.choices[0].message)
60 |     print("Round 2:", result.choices[0].message.content)
61 |     # The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/train/scripts/api_example/test_toolcall.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import os
17 | from typing import Sequence
18 | 
19 | from openai import OpenAI
20 | from transformers.utils.versions import require_version
21 | 
22 | 
23 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
24 | 
25 | 
26 | def calculate_gpa(grades: Sequence[str], hours: Sequence[int]) -> float:
27 |     grade_to_score = {"A": 4, "B": 3, "C": 2}
28 |     total_score, total_hour = 0, 0
29 |     for grade, hour in zip(grades, hours):
30 |         total_score += grade_to_score[grade] * hour
31 |         total_hour += hour
32 |     return round(total_score / total_hour, 2)
33 | 
34 | 
35 | def main():
36 |     client = OpenAI(
37 |         api_key="{}".format(os.environ.get("API_KEY", "0")),
38 |         base_url="http://localhost:{}/v1".format(os.environ.get("API_PORT", 8000)),
39 |     )
40 |     tools = [
41 |         {
42 |             "type": "function",
43 |             "function": {
44 |                 "name": "calculate_gpa",
45 |                 "description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
46 |                 "parameters": {
47 |                     "type": "object",
48 |                     "properties": {
49 |                         "grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
50 |                         "hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
51 |                     },
52 |                     "required": ["grades", "hours"],
53 |                 },
54 |             },
55 |         }
56 |     ]
57 |     tool_map = {"calculate_gpa": calculate_gpa}
58 | 
59 |     messages = []
60 |     messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
61 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
62 |     if result.choices[0].message.tool_calls is None:
63 |         raise ValueError("Cannot retrieve function call from the response.")
64 | 
65 |     messages.append(result.choices[0].message)
66 |     tool_call = result.choices[0].message.tool_calls[0].function
67 |     print(tool_call)
68 |     # Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
69 |     name, arguments = tool_call.name, json.loads(tool_call.arguments)
70 |     tool_result = tool_map[name](**arguments)
71 |     messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
72 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
73 |     print(result.choices[0].message.content)
74 |     # Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/train/scripts/pissa_init.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is based on the HuggingFace's PEFT library.
 4 | # https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import os
19 | from typing import TYPE_CHECKING
20 | 
21 | import fire
22 | from peft import LoraConfig, TaskType, get_peft_model
23 | from transformers import AutoModelForCausalLM, AutoTokenizer
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from transformers import PreTrainedModel
28 | 
29 | 
30 | def quantize_pissa(
31 |     model_name_or_path: str,
32 |     output_dir: str,
33 |     pissa_iter: int = 16,
34 |     lora_alpha: int = None,
35 |     lora_rank: int = 16,
36 |     lora_dropout: float = 0,
37 |     lora_target: tuple = ("q_proj", "v_proj"),
38 |     save_safetensors: bool = True,
39 | ):
40 |     r"""
41 |     Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
42 |     Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
43 |     """
44 |     if isinstance(lora_target, str):
45 |         lora_target = [name.strip() for name in lora_target.split(",")]
46 | 
47 |     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
48 |     model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
49 | 
50 |     lora_config = LoraConfig(
51 |         task_type=TaskType.CAUSAL_LM,
52 |         r=lora_rank,
53 |         lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
54 |         lora_dropout=lora_dropout,
55 |         target_modules=lora_target,
56 |         init_lora_weights="pissa" if pissa_iter == -1 else f"pissa_niter_{pissa_iter}",
57 |     )
58 | 
59 |     # Init PiSSA model
60 |     peft_model = get_peft_model(model, lora_config)
61 |     pissa_dir = os.path.join(output_dir, "pissa_init")
62 | 
63 |     # Save PiSSA model
64 |     setattr(peft_model.peft_config["default"], "base_model_name_or_path", os.path.abspath(output_dir))
65 |     setattr(peft_model.peft_config["default"], "init_lora_weights", True)  # don't apply pissa again
66 |     peft_model.save_pretrained(pissa_dir, safe_serialization=save_safetensors)
67 |     print(f"Adapter weights saved in {pissa_dir}")
68 | 
69 |     # Save base model
70 |     base_model: "PreTrainedModel" = peft_model.unload()
71 |     base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
72 |     tokenizer.save_pretrained(output_dir)
73 |     print(f"Model weights saved in {output_dir}")
74 | 
75 |     print("- Fine-tune this model with:")
76 |     print(f"model_name_or_path: {output_dir}")
77 |     print(f"adapter_name_or_path: {pissa_dir}")
78 |     print("finetuning_type: lora")
79 |     print("pissa_init: false")
80 |     print("pissa_convert: true")
81 |     print("- and optionally with:")
82 |     print("quantization_bit: 4")
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     fire.Fire(quantize_pissa)
87 | 


--------------------------------------------------------------------------------
/train/scripts/stat_utils/cal_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Microsoft Corporation and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the Microsoft's DeepSpeed library.
 4 | # https://www.deepspeed.ai/tutorials/flops-profiler/
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import fire
19 | import torch
20 | from deepspeed.accelerator import get_accelerator  # type: ignore
21 | from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
22 | 
23 | from llamafactory.chat import ChatModel
24 | 
25 | 
26 | def calculate_flops(
27 |     model_name_or_path: str,
28 |     batch_size: int = 1,
29 |     seq_length: int = 512,
30 |     flash_attn: str = "auto",
31 | ):
32 |     r"""
33 |     Calculates the flops of pre-trained models.
34 |     Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
35 |     """
36 |     with get_accelerator().device(0):
37 |         chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
38 |         fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
39 |         input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
40 |         flops, macs, params = get_model_profile(
41 |             chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
42 |         )
43 |         print("FLOPs:", flops)
44 |         print("MACs:", macs)
45 |         print("Params:", params)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     fire.Fire(calculate_flops)
50 | 


--------------------------------------------------------------------------------
/train/scripts/stat_utils/length_cdf.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections import defaultdict
16 | 
17 | import fire
18 | from tqdm import tqdm
19 | 
20 | from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
21 | from llamafactory.hparams import get_train_args
22 | from llamafactory.model import load_tokenizer
23 | 
24 | 
25 | def length_cdf(
26 |     model_name_or_path: str,
27 |     dataset: str = "alpaca_en_demo",
28 |     dataset_dir: str = "data",
29 |     template: str = "default",
30 |     interval: int = 1000,
31 | ):
32 |     r"""
33 |     Calculates the distribution of the input lengths in the dataset.
34 |     Usage: export CUDA_VISIBLE_DEVICES=0
35 |     python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
36 |     """
37 |     model_args, data_args, training_args, _, _ = get_train_args(
38 |         dict(
39 |             stage="sft",
40 |             model_name_or_path=model_name_or_path,
41 |             dataset=dataset,
42 |             dataset_dir=dataset_dir,
43 |             template=template,
44 |             cutoff_len=1_000_000,
45 |             preprocessing_num_workers=16,
46 |             output_dir="dummy_dir",
47 |             overwrite_cache=True,
48 |             do_train=True,
49 |         )
50 |     )
51 |     tokenizer_module = load_tokenizer(model_args)
52 |     template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
53 |     trainset = get_dataset(template, model_args, data_args, training_args, "sft", **tokenizer_module)["train_dataset"]
54 |     total_num = len(trainset)
55 |     length_dict = defaultdict(int)
56 |     for sample in tqdm(trainset["input_ids"], desc="Collecting lengths"):
57 |         length_dict[len(sample) // interval * interval] += 1
58 | 
59 |     length_tuples = list(length_dict.items())
60 |     length_tuples.sort()
61 |     count_accu, prob_accu = 0, 0
62 |     for length, count in length_tuples:
63 |         count_accu += count
64 |         prob_accu += count / total_num * 100
65 |         print(f"{count_accu:d} ({prob_accu:.2f}%) samples have length < {length + interval}.")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     fire.Fire(length_cdf)
70 | 


--------------------------------------------------------------------------------
/train/src/api.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import uvicorn
18 | 
19 | from llamafactory.api.app import create_app
20 | from llamafactory.chat import ChatModel
21 | 
22 | 
23 | def main():
24 |     chat_model = ChatModel()
25 |     app = create_app(chat_model)
26 |     api_host = os.getenv("API_HOST", "0.0.0.0")
27 |     api_port = int(os.getenv("API_PORT", "8000"))
28 |     print(f"Visit http://localhost:{api_port}/docs for API document.")
29 |     uvicorn.run(app, host=api_host, port=api_port)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | r"""
16 | Efficient fine-tuning of large language models.
17 | 
18 | Level:
19 |   api, webui > chat, eval, train > data, model > hparams > extras
20 | 
21 | Dependency graph:
22 |   main:
23 |     transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0
24 |     datasets>=2.16.0,<=3.2.0
25 |     accelerate>=0.34.0,<=1.2.1
26 |     peft>=0.11.1,<=0.12.0
27 |     trl>=0.8.6,<=0.9.6
28 |   attention:
29 |     transformers>=4.42.4 (gemma+fa2)
30 |   longlora:
31 |     transformers>=4.41.2,<4.48.0
32 |   packing:
33 |     transformers>=4.43.0,<=4.48.2
34 | 
35 | Disable version checking: DISABLE_VERSION_CHECK=1
36 | Enable VRAM recording: RECORD_VRAM=1
37 | Force check imports: FORCE_CHECK_IMPORTS=1
38 | Force using torchrun: FORCE_TORCHRUN=1
39 | Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
40 | Use modelscope: USE_MODELSCOPE_HUB=1
41 | Use openmind: USE_OPENMIND_HUB=1
42 | """
43 | 
44 | from .extras.env import VERSION
45 | 
46 | 
47 | __version__ = VERSION
48 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/api/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/api/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from typing import TYPE_CHECKING, Any, Dict
17 | 
18 | 
19 | if TYPE_CHECKING:
20 |     from pydantic import BaseModel
21 | 
22 | 
23 | def dictify(data: "BaseModel") -> Dict[str, Any]:
24 |     try:  # pydantic v2
25 |         return data.model_dump(exclude_unset=True)
26 |     except AttributeError:  # pydantic v1
27 |         return data.dict(exclude_unset=True)
28 | 
29 | 
30 | def jsonify(data: "BaseModel") -> str:
31 |     try:  # pydantic v2
32 |         return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
33 |     except AttributeError:  # pydantic v1
34 |         return data.json(exclude_unset=True, ensure_ascii=False)
35 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base_engine import BaseEngine
16 | from .chat_model import ChatModel
17 | 
18 | 
19 | __all__ = ["BaseEngine", "ChatModel"]
20 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/chat/base_engine.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 the LlamaFactory team.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from abc import ABC, abstractmethod
 16 | from dataclasses import dataclass
 17 | from typing import TYPE_CHECKING, Any, AsyncGenerator, Dict, List, Literal, Optional, Sequence, Union
 18 | 
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from transformers import PreTrainedModel, PreTrainedTokenizer
 22 |     from vllm import AsyncLLMEngine
 23 | 
 24 |     from ..data import Template
 25 |     from ..data.mm_plugin import AudioInput, ImageInput, VideoInput
 26 |     from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
 27 | 
 28 | 
 29 | @dataclass
 30 | class Response:
 31 |     response_text: str
 32 |     response_length: int
 33 |     prompt_length: int
 34 |     finish_reason: Literal["stop", "length"]
 35 | 
 36 | 
 37 | class BaseEngine(ABC):
 38 |     r"""
 39 |     Base class for inference engine of chat models.
 40 | 
 41 |     Must implements async methods: chat(), stream_chat() and get_scores().
 42 |     """
 43 | 
 44 |     model: Union["PreTrainedModel", "AsyncLLMEngine"]
 45 |     tokenizer: "PreTrainedTokenizer"
 46 |     can_generate: bool
 47 |     template: "Template"
 48 |     generating_args: Dict[str, Any]
 49 | 
 50 |     @abstractmethod
 51 |     def __init__(
 52 |         self,
 53 |         model_args: "ModelArguments",
 54 |         data_args: "DataArguments",
 55 |         finetuning_args: "FinetuningArguments",
 56 |         generating_args: "GeneratingArguments",
 57 |     ) -> None:
 58 |         r"""
 59 |         Initializes an inference engine.
 60 |         """
 61 |         ...
 62 | 
 63 |     @abstractmethod
 64 |     async def chat(
 65 |         self,
 66 |         messages: Sequence[Dict[str, str]],
 67 |         system: Optional[str] = None,
 68 |         tools: Optional[str] = None,
 69 |         images: Optional[Sequence["ImageInput"]] = None,
 70 |         videos: Optional[Sequence["VideoInput"]] = None,
 71 |         audios: Optional[Sequence["AudioInput"]] = None,
 72 |         **input_kwargs,
 73 |     ) -> List["Response"]:
 74 |         r"""
 75 |         Gets a list of responses of the chat model.
 76 |         """
 77 |         ...
 78 | 
 79 |     @abstractmethod
 80 |     async def stream_chat(
 81 |         self,
 82 |         messages: Sequence[Dict[str, str]],
 83 |         system: Optional[str] = None,
 84 |         tools: Optional[str] = None,
 85 |         images: Optional[Sequence["ImageInput"]] = None,
 86 |         videos: Optional[Sequence["VideoInput"]] = None,
 87 |         audios: Optional[Sequence["AudioInput"]] = None,
 88 |         **input_kwargs,
 89 |     ) -> AsyncGenerator[str, None]:
 90 |         r"""
 91 |         Gets the response token-by-token of the chat model.
 92 |         """
 93 |         ...
 94 | 
 95 |     @abstractmethod
 96 |     async def get_scores(
 97 |         self,
 98 |         batch_input: List[str],
 99 |         **input_kwargs,
100 |     ) -> List[float]:
101 |         r"""
102 |         Gets a list of scores of the reward model.
103 |         """
104 |         ...
105 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .collator import (
16 |     KTODataCollatorWithPadding,
17 |     MultiModalDataCollatorForSeq2Seq,
18 |     PairwiseDataCollatorWithPadding,
19 |     SFTDataCollatorWith4DAttentionMask,
20 | )
21 | from .data_utils import Role, split_dataset
22 | from .loader import get_dataset
23 | from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
24 | 
25 | 
26 | __all__ = [
27 |     "KTODataCollatorWithPadding",
28 |     "MultiModalDataCollatorForSeq2Seq",
29 |     "PairwiseDataCollatorWithPadding",
30 |     "SFTDataCollatorWith4DAttentionMask",
31 |     "Role",
32 |     "split_dataset",
33 |     "get_dataset",
34 |     "TEMPLATES",
35 |     "Template",
36 |     "get_template_and_fix_tokenizer",
37 | ]
38 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/data/processors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/data/processors/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/data/processors/pretrain.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from itertools import chain
19 | from typing import TYPE_CHECKING, Any, Dict, List
20 | 
21 | 
22 | if TYPE_CHECKING:
23 |     from transformers import PreTrainedTokenizer
24 | 
25 |     from ...hparams import DataArguments
26 | 
27 | 
28 | def preprocess_pretrain_dataset(
29 |     examples: Dict[str, List[Any]], tokenizer: "PreTrainedTokenizer", data_args: "DataArguments"
30 | ) -> Dict[str, List[Any]]:
31 |     # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
32 |     eos_token = "<|end_of_text|>" if data_args.template == "llama3" else tokenizer.eos_token
33 |     text_examples = [messages[0]["content"] + eos_token for messages in examples["_prompt"]]
34 | 
35 |     if not data_args.packing:
36 |         if getattr(tokenizer, "add_bos_token", False):
37 |             text_examples = [tokenizer.bos_token + example for example in text_examples]
38 | 
39 |         result = tokenizer(text_examples, add_special_tokens=False, truncation=True, max_length=data_args.cutoff_len)
40 |     else:
41 |         tokenized_examples = tokenizer(text_examples, add_special_tokens=False)
42 |         concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
43 |         total_length = len(concatenated_examples[list(concatenated_examples.keys())[0]])
44 |         block_size = data_args.cutoff_len
45 |         total_length = (total_length // block_size) * block_size
46 |         result = {
47 |             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
48 |             for k, t in concatenated_examples.items()
49 |         }
50 |         if getattr(tokenizer, "add_bos_token", False):
51 |             for i in range(len(result["input_ids"])):
52 |                 result["input_ids"][i][0] = tokenizer.bos_token_id
53 | 
54 |     return result
55 | 
56 | 
57 | def print_pretrain_dataset_example(example: Dict[str, List[int]], tokenizer: "PreTrainedTokenizer") -> None:
58 |     print("input_ids:\n{}".format(example["input_ids"]))
59 |     print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
60 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/data/processors/processor_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import bisect
16 | from typing import List, Sequence, Tuple
17 | 
18 | 
19 | def search_for_fit(numbers: Sequence[int], capacity: int) -> int:
20 |     r"""
21 |     Finds the index of largest number that fits into the knapsack with the given capacity.
22 |     """
23 |     index = bisect.bisect(numbers, capacity)
24 |     return -1 if index == 0 else (index - 1)
25 | 
26 | 
27 | def greedy_knapsack(numbers: List[int], capacity: int) -> List[List[int]]:
28 |     r"""
29 |     An efficient greedy algorithm with binary search for the knapsack problem.
30 |     """
31 |     numbers.sort()  # sort numbers in ascending order for binary search
32 |     knapsacks = []
33 | 
34 |     while numbers:
35 |         current_knapsack = []
36 |         remaining_capacity = capacity
37 | 
38 |         while True:
39 |             index = search_for_fit(numbers, remaining_capacity)
40 |             if index == -1:
41 |                 break  # no more numbers fit in this knapsack
42 | 
43 |             remaining_capacity -= numbers[index]  # update the remaining capacity
44 |             current_knapsack.append(numbers.pop(index))  # add the number to knapsack
45 | 
46 |         knapsacks.append(current_knapsack)
47 | 
48 |     return knapsacks
49 | 
50 | 
51 | def infer_seqlen(source_len: int, target_len: int, cutoff_len: int) -> Tuple[int, int]:
52 |     r"""
53 |     Computes the real sequence length after truncation by the cutoff_len.
54 |     """
55 |     if target_len * 2 < cutoff_len:  # truncate source
56 |         max_target_len = cutoff_len
57 |     elif source_len * 2 < cutoff_len:  # truncate target
58 |         max_target_len = cutoff_len - source_len
59 |     else:  # truncate both
60 |         max_target_len = int(cutoff_len * (target_len / (source_len + target_len)))
61 | 
62 |     new_target_len = min(max_target_len, target_len)
63 |     max_source_len = max(cutoff_len - new_target_len, 0)
64 |     new_source_len = min(max_source_len, source_len)
65 |     return new_source_len, new_target_len
66 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/eval/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/eval/template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import Dict, List, Sequence, Tuple
17 | 
18 | from ..data import Role
19 | from ..extras.constants import CHOICES
20 | 
21 | 
22 | @dataclass
23 | class EvalTemplate:
24 |     system: str
25 |     choice: str
26 |     answer: str
27 | 
28 |     def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
29 |         r"""
30 |         input: a dict with keys {"question", "A", "B", "C", "D", "answer"}
31 |         output: a tuple of (prompt, response)
32 |         """
33 |         candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
34 |         return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
35 | 
36 |     def format_example(
37 |         self, target_data: Dict[str, str], support_set: Sequence[Dict[str, str]], subject_name: str
38 |     ) -> List[Dict[str, str]]:
39 |         r"""
40 |         Converts dataset examples to messages.
41 |         """
42 |         messages = []
43 |         for k in range(len(support_set)):
44 |             prompt, response = self._parse_example(support_set[k])
45 |             messages.append({"role": Role.USER.value, "content": prompt})
46 |             messages.append({"role": Role.ASSISTANT.value, "content": response})
47 | 
48 |         prompt, response = self._parse_example(target_data)
49 |         messages.append({"role": Role.USER.value, "content": prompt})
50 |         messages.append({"role": Role.ASSISTANT.value, "content": response})
51 |         messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
52 |         return messages
53 | 
54 | 
55 | eval_templates: Dict[str, "EvalTemplate"] = {}
56 | 
57 | 
58 | def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
59 |     eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
60 | 
61 | 
62 | def get_eval_template(name: str) -> "EvalTemplate":
63 |     eval_template = eval_templates.get(name, None)
64 |     assert eval_template is not None, f"Template {name} does not exist."
65 |     return eval_template
66 | 
67 | 
68 | _register_eval_template(
69 |     name="en",
70 |     system="The following are multiple choice questions (with answers) about {subject}.\n\n",
71 |     choice="\n{choice}. {content}",
72 |     answer="\nAnswer:",
73 | )
74 | 
75 | 
76 | _register_eval_template(
77 |     name="zh",
78 |     system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
79 |     choice="\n{choice}. {content}",
80 |     answer="\n答案：",
81 | )
82 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/extras/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/extras/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/commands/env.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import platform
19 | 
20 | import accelerate
21 | import datasets
22 | import peft
23 | import torch
24 | import transformers
25 | import trl
26 | from transformers.utils import is_torch_cuda_available, is_torch_npu_available
27 | 
28 | 
29 | VERSION = "0.9.2.dev0"
30 | 
31 | 
32 | def print_env() -> None:
33 |     info = {
34 |         "`llamafactory` version": VERSION,
35 |         "Platform": platform.platform(),
36 |         "Python version": platform.python_version(),
37 |         "PyTorch version": torch.__version__,
38 |         "Transformers version": transformers.__version__,
39 |         "Datasets version": datasets.__version__,
40 |         "Accelerate version": accelerate.__version__,
41 |         "PEFT version": peft.__version__,
42 |         "TRL version": trl.__version__,
43 |     }
44 | 
45 |     if is_torch_cuda_available():
46 |         info["PyTorch version"] += " (GPU)"
47 |         info["GPU type"] = torch.cuda.get_device_name()
48 | 
49 |     if is_torch_npu_available():
50 |         info["PyTorch version"] += " (NPU)"
51 |         info["NPU type"] = torch.npu.get_device_name()
52 |         info["CANN version"] = torch.version.cann
53 | 
54 |     try:
55 |         import deepspeed  # type: ignore
56 | 
57 |         info["DeepSpeed version"] = deepspeed.__version__
58 |     except Exception:
59 |         pass
60 | 
61 |     try:
62 |         import bitsandbytes
63 | 
64 |         info["Bitsandbytes version"] = bitsandbytes.__version__
65 |     except Exception:
66 |         pass
67 | 
68 |     try:
69 |         import vllm
70 | 
71 |         info["vLLM version"] = vllm.__version__
72 |     except Exception:
73 |         pass
74 | 
75 |     print("\n" + "\n".join([f"- {key}: {value}" for key, value in info.items()]) + "\n")
76 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/extras/packages.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
  2 | #
  3 | # This code is inspired by the HuggingFace's transformers library.
  4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import importlib.metadata
 19 | import importlib.util
 20 | from functools import lru_cache
 21 | from typing import TYPE_CHECKING
 22 | 
 23 | from packaging import version
 24 | 
 25 | 
 26 | if TYPE_CHECKING:
 27 |     from packaging.version import Version
 28 | 
 29 | 
 30 | def _is_package_available(name: str) -> bool:
 31 |     return importlib.util.find_spec(name) is not None
 32 | 
 33 | 
 34 | def _get_package_version(name: str) -> "Version":
 35 |     try:
 36 |         return version.parse(importlib.metadata.version(name))
 37 |     except Exception:
 38 |         return version.parse("0.0.0")
 39 | 
 40 | 
 41 | def is_pyav_available():
 42 |     return _is_package_available("av")
 43 | 
 44 | 
 45 | def is_librosa_available():
 46 |     return _is_package_available("librosa")
 47 | 
 48 | 
 49 | def is_fastapi_available():
 50 |     return _is_package_available("fastapi")
 51 | 
 52 | 
 53 | def is_galore_available():
 54 |     return _is_package_available("galore_torch")
 55 | 
 56 | 
 57 | def is_apollo_available():
 58 |     return _is_package_available("apollo_torch")
 59 | 
 60 | 
 61 | def is_gradio_available():
 62 |     return _is_package_available("gradio")
 63 | 
 64 | 
 65 | def is_matplotlib_available():
 66 |     return _is_package_available("matplotlib")
 67 | 
 68 | 
 69 | def is_pillow_available():
 70 |     return _is_package_available("PIL")
 71 | 
 72 | 
 73 | def is_ray_available():
 74 |     return _is_package_available("ray")
 75 | 
 76 | 
 77 | def is_requests_available():
 78 |     return _is_package_available("requests")
 79 | 
 80 | 
 81 | def is_rouge_available():
 82 |     return _is_package_available("rouge_chinese")
 83 | 
 84 | 
 85 | def is_starlette_available():
 86 |     return _is_package_available("sse_starlette")
 87 | 
 88 | 
 89 | @lru_cache
 90 | def is_transformers_version_greater_than(content: str):
 91 |     return _get_package_version("transformers") >= version.parse(content)
 92 | 
 93 | 
 94 | def is_uvicorn_available():
 95 |     return _is_package_available("uvicorn")
 96 | 
 97 | 
 98 | def is_vllm_available():
 99 |     return _is_package_available("vllm")
100 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/hparams/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .data_args import DataArguments
16 | from .evaluation_args import EvaluationArguments
17 | from .finetuning_args import FinetuningArguments
18 | from .generating_args import GeneratingArguments
19 | from .model_args import ModelArguments
20 | from .parser import get_eval_args, get_infer_args, get_ray_args, get_train_args, read_args
21 | from .training_args import RayArguments, TrainingArguments
22 | 
23 | 
24 | __all__ = [
25 |     "DataArguments",
26 |     "EvaluationArguments",
27 |     "FinetuningArguments",
28 |     "GeneratingArguments",
29 |     "ModelArguments",
30 |     "RayArguments",
31 |     "TrainingArguments",
32 |     "get_eval_args",
33 |     "get_infer_args",
34 |     "get_ray_args",
35 |     "get_train_args",
36 |     "read_args",
37 | ]
38 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/hparams/evaluation_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass, field
17 | from typing import Literal, Optional
18 | 
19 | from datasets import DownloadMode
20 | 
21 | 
22 | @dataclass
23 | class EvaluationArguments:
24 |     r"""
25 |     Arguments pertaining to specify the evaluation parameters.
26 |     """
27 | 
28 |     task: str = field(
29 |         metadata={"help": "Name of the evaluation task."},
30 |     )
31 |     task_dir: str = field(
32 |         default="evaluation",
33 |         metadata={"help": "Path to the folder containing the evaluation datasets."},
34 |     )
35 |     batch_size: int = field(
36 |         default=4,
37 |         metadata={"help": "The batch size per GPU for evaluation."},
38 |     )
39 |     seed: int = field(
40 |         default=42,
41 |         metadata={"help": "Random seed to be used with data loaders."},
42 |     )
43 |     lang: Literal["en", "zh"] = field(
44 |         default="en",
45 |         metadata={"help": "Language used at evaluation."},
46 |     )
47 |     n_shot: int = field(
48 |         default=5,
49 |         metadata={"help": "Number of examplars for few-shot learning."},
50 |     )
51 |     save_dir: Optional[str] = field(
52 |         default=None,
53 |         metadata={"help": "Path to save the evaluation results."},
54 |     )
55 |     download_mode: DownloadMode = field(
56 |         default=DownloadMode.REUSE_DATASET_IF_EXISTS,
57 |         metadata={"help": "Download mode used for the evaluation datasets."},
58 |     )
59 | 
60 |     def __post_init__(self):
61 |         if self.save_dir is not None and os.path.exists(self.save_dir):
62 |             raise ValueError("`save_dir` already exists, use another one.")
63 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/hparams/generating_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import asdict, dataclass, field
16 | from typing import Any, Dict, Optional
17 | 
18 | from transformers import GenerationConfig
19 | 
20 | 
21 | @dataclass
22 | class GeneratingArguments:
23 |     r"""
24 |     Arguments pertaining to specify the decoding parameters.
25 |     """
26 | 
27 |     do_sample: bool = field(
28 |         default=True,
29 |         metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."},
30 |     )
31 |     temperature: float = field(
32 |         default=0.95,
33 |         metadata={"help": "The value used to modulate the next token probabilities."},
34 |     )
35 |     top_p: float = field(
36 |         default=0.7,
37 |         metadata={
38 |             "help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."
39 |         },
40 |     )
41 |     top_k: int = field(
42 |         default=50,
43 |         metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."},
44 |     )
45 |     num_beams: int = field(
46 |         default=1,
47 |         metadata={"help": "Number of beams for beam search. 1 means no beam search."},
48 |     )
49 |     max_length: int = field(
50 |         default=1024,
51 |         metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."},
52 |     )
53 |     max_new_tokens: int = field(
54 |         default=1024,
55 |         metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."},
56 |     )
57 |     repetition_penalty: float = field(
58 |         default=1.0,
59 |         metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."},
60 |     )
61 |     length_penalty: float = field(
62 |         default=1.0,
63 |         metadata={"help": "Exponential penalty to the length that is used with beam-based generation."},
64 |     )
65 |     default_system: Optional[str] = field(
66 |         default=None,
67 |         metadata={"help": "Default system message to use in chat completion."},
68 |     )
69 |     skip_special_tokens: bool = field(
70 |         default=True,
71 |         metadata={"help": "Whether or not to remove special tokens in the decoding."},
72 |     )
73 | 
74 |     def to_dict(self, obey_generation_config: bool = False) -> Dict[str, Any]:
75 |         args = asdict(self)
76 |         if args.get("max_new_tokens", -1) > 0:
77 |             args.pop("max_length", None)
78 |         else:
79 |             args.pop("max_new_tokens", None)
80 | 
81 |         if obey_generation_config:
82 |             generation_config = GenerationConfig()
83 |             for key in list(args.keys()):
84 |                 if not hasattr(generation_config, key):
85 |                     args.pop(key)
86 | 
87 |         return args
88 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/hparams/training_args.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from dataclasses import dataclass, field
 3 | from typing import Literal, Optional, Union
 4 | 
 5 | from transformers import Seq2SeqTrainingArguments
 6 | from transformers.training_args import _convert_str_dict
 7 | 
 8 | from ..extras.misc import use_ray
 9 | 
10 | 
11 | @dataclass
12 | class RayArguments:
13 |     r"""
14 |     Arguments pertaining to the Ray training.
15 |     """
16 | 
17 |     ray_run_name: Optional[str] = field(
18 |         default=None,
19 |         metadata={"help": "The training results will be saved at `saves/ray_run_name`."},
20 |     )
21 |     ray_num_workers: int = field(
22 |         default=1,
23 |         metadata={"help": "The number of workers for Ray training. Default is 1 worker."},
24 |     )
25 |     resources_per_worker: Union[dict, str] = field(
26 |         default_factory=lambda: {"GPU": 1},
27 |         metadata={"help": "The resources per worker for Ray training. Default is to use 1 GPU per worker."},
28 |     )
29 |     placement_strategy: Literal["SPREAD", "PACK", "STRICT_SPREAD", "STRICT_PACK"] = field(
30 |         default="PACK",
31 |         metadata={"help": "The placement strategy for Ray training. Default is PACK."},
32 |     )
33 | 
34 |     def __post_init__(self):
35 |         self.use_ray = use_ray()
36 |         if isinstance(self.resources_per_worker, str) and self.resources_per_worker.startswith("{"):
37 |             self.resources_per_worker = _convert_str_dict(json.loads(self.resources_per_worker))
38 | 
39 | 
40 | @dataclass
41 | class TrainingArguments(RayArguments, Seq2SeqTrainingArguments):
42 |     r"""
43 |     Arguments pertaining to the trainer.
44 |     """
45 | 
46 |     def __post_init__(self):
47 |         Seq2SeqTrainingArguments.__post_init__(self)
48 |         RayArguments.__post_init__(self)
49 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp  # use absolute import
16 | 
17 | 
18 | def launch():
19 |     run_exp()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     launch()
24 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .loader import load_config, load_model, load_tokenizer
16 | from .model_utils.misc import find_all_linear_modules
17 | from .model_utils.quantization import QuantizationMethod
18 | from .model_utils.valuehead import load_valuehead_params
19 | 
20 | 
21 | __all__ = [
22 |     "QuantizationMethod",
23 |     "load_config",
24 |     "load_model",
25 |     "load_tokenizer",
26 |     "find_all_linear_modules",
27 |     "load_valuehead_params",
28 | ]
29 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/model/model_utils/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import math
16 | from contextlib import nullcontext
17 | from typing import TYPE_CHECKING
18 | 
19 | import torch
20 | from transformers.integrations import is_deepspeed_zero3_enabled
21 | 
22 | from ...extras import logging
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from transformers import PreTrainedModel, PreTrainedTokenizer
27 | 
28 | 
29 | logger = logging.get_logger(__name__)
30 | 
31 | 
32 | def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None:
33 |     embedding_dim = embed_weight.size(1)
34 |     avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
35 |     noise_weight = torch.empty_like(embed_weight[-num_new_tokens:])
36 |     noise_weight.normal_(mean=0, std=(1.0 / math.sqrt(embedding_dim)))
37 |     embed_weight[-num_new_tokens:] = avg_weight + noise_weight
38 | 
39 | 
40 | def resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer") -> None:
41 |     r"""
42 |     Resize token embeddings.
43 |     """
44 |     if is_deepspeed_zero3_enabled():
45 |         import deepspeed  # type: ignore
46 | 
47 |         params = [model.get_input_embeddings().weight]
48 |         if model.get_output_embeddings() is not None and not model.config.tie_word_embeddings:
49 |             params.append(model.get_output_embeddings().weight)
50 | 
51 |         context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
52 |     else:
53 |         context_maybe_zero3 = nullcontext()
54 | 
55 |     with context_maybe_zero3:
56 |         current_embedding_size = model.get_input_embeddings().weight.size(0)
57 | 
58 |     if len(tokenizer) > current_embedding_size:
59 |         if getattr(model, "quantization_method", None):
60 |             raise ValueError("Cannot resize embedding layers of a quantized model.")
61 | 
62 |         if not isinstance(model.get_output_embeddings(), torch.nn.Linear):
63 |             raise ValueError("Current model does not support resizing embedding layers.")
64 | 
65 |         model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
66 |         with context_maybe_zero3:
67 |             new_embedding_size = model.get_input_embeddings().weight.size(0)
68 |             num_new_tokens = new_embedding_size - current_embedding_size
69 |             _noisy_mean_initialization(model.get_input_embeddings().weight.data, num_new_tokens)
70 |             _noisy_mean_initialization(model.get_output_embeddings().weight.data, num_new_tokens)
71 | 
72 |         logger.info_rank0(f"Resized token embeddings from {current_embedding_size} to {new_embedding_size}.")
73 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/liger_kernel.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import inspect
16 | from typing import TYPE_CHECKING
17 | 
18 | from ...extras import logging
19 | 
20 | 
21 | if TYPE_CHECKING:
22 |     from transformers import PretrainedConfig
23 | 
24 |     from ...hparams import ModelArguments
25 | 
26 | 
27 | logger = logging.get_logger(__name__)
28 | 
29 | 
30 | def apply_liger_kernel(
31 |     config: "PretrainedConfig",
32 |     model_args: "ModelArguments",
33 |     is_trainable: bool,
34 |     require_logits: bool,
35 | ) -> None:
36 |     if not is_trainable or not model_args.enable_liger_kernel:
37 |         return
38 | 
39 |     model_type = getattr(config, "model_type", None)
40 |     if model_type == "gemma":
41 |         from liger_kernel.transformers import apply_liger_kernel_to_gemma as apply_liger_kernel
42 |     elif model_type == "gemma2":
43 |         from liger_kernel.transformers import apply_liger_kernel_to_gemma2 as apply_liger_kernel
44 |     elif model_type == "llama":
45 |         from liger_kernel.transformers import apply_liger_kernel_to_llama as apply_liger_kernel
46 |     elif model_type == "mistral":
47 |         from liger_kernel.transformers import apply_liger_kernel_to_mistral as apply_liger_kernel
48 |     elif model_type == "mixtral":
49 |         from liger_kernel.transformers import apply_liger_kernel_to_mixtral as apply_liger_kernel
50 |     elif model_type == "phi3":
51 |         from liger_kernel.transformers import apply_liger_kernel_to_phi3 as apply_liger_kernel
52 |     elif model_type == "qwen2":
53 |         from liger_kernel.transformers import apply_liger_kernel_to_qwen2 as apply_liger_kernel
54 |     elif model_type == "qwen2_vl":
55 |         from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl as apply_liger_kernel
56 |     else:
57 |         logger.warning_rank0("Current model does not support liger kernel.")
58 |         return
59 | 
60 |     if require_logits and "fused_linear_cross_entropy" in inspect.signature(apply_liger_kernel).parameters:
61 |         logger.info_rank0("Current training stage does not support chunked cross entropy.")
62 |         kwargs = {"fused_linear_cross_entropy": False}
63 |     else:
64 |         kwargs = {}
65 | 
66 |     apply_liger_kernel(**kwargs)
67 |     logger.info_rank0("Liger kernel has been applied to the model.")
68 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/mod.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ...extras.constants import MOD_SUPPORTED_MODELS
18 | 
19 | 
20 | if TYPE_CHECKING:
21 |     from transformers import PretrainedConfig, PreTrainedModel
22 | 
23 |     from ...hparams import ModelArguments
24 | 
25 | 
26 | def load_mod_pretrained_model(**init_kwargs) -> "PreTrainedModel":
27 |     from MoD import AutoMoDModelForCausalLM
28 | 
29 |     return AutoMoDModelForCausalLM.from_pretrained(**init_kwargs)
30 | 
31 | 
32 | def convert_pretrained_model_to_mod(
33 |     model: "PreTrainedModel", config: "PretrainedConfig", model_args: "ModelArguments"
34 | ) -> "PreTrainedModel":
35 |     from MoD import apply_mod_to_hf
36 | 
37 |     if getattr(config, "model_type", None) not in MOD_SUPPORTED_MODELS:
38 |         raise ValueError("Current model is not supported by mixture-of-depth.")
39 | 
40 |     model = apply_mod_to_hf(model)
41 |     model = model.to(model_args.compute_dtype)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/moe.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Sequence
16 | 
17 | import torch
18 | from transformers.integrations import is_deepspeed_zero3_enabled
19 | 
20 | from ...extras.misc import check_version
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from transformers import PretrainedConfig, PreTrainedModel
25 | 
26 |     from ...hparams import ModelArguments
27 | 
28 | 
29 | def _set_z3_leaf_modules(model: "PreTrainedModel", leaf_modules: Sequence["torch.nn.Module"]) -> None:
30 |     check_version("deepspeed>=0.13.0")
31 |     from deepspeed.utils import set_z3_leaf_modules  # type: ignore
32 | 
33 |     set_z3_leaf_modules(model, leaf_modules)
34 | 
35 | 
36 | def add_z3_leaf_module(model: "PreTrainedModel") -> None:
37 |     r"""
38 |     Sets module as a leaf module to skip partitioning in deepspeed zero3.
39 |     """
40 |     if not is_deepspeed_zero3_enabled():
41 |         return
42 | 
43 |     model_type = getattr(model.config, "model_type", None)
44 |     if model_type == "dbrx":
45 |         from transformers.models.dbrx.modeling_dbrx import DbrxFFN
46 | 
47 |         _set_z3_leaf_modules(model, [DbrxFFN])
48 | 
49 |     if model_type == "jamba":
50 |         from transformers.models.jamba.modeling_jamba import JambaSparseMoeBlock
51 | 
52 |         _set_z3_leaf_modules(model, [JambaSparseMoeBlock])
53 | 
54 |     if model_type == "jetmoe":
55 |         from transformers.models.jetmoe.modeling_jetmoe import JetMoeMoA, JetMoeMoE
56 | 
57 |         _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
58 | 
59 |     if model_type == "mixtral":
60 |         from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
61 | 
62 |         _set_z3_leaf_modules(model, [MixtralSparseMoeBlock])
63 | 
64 |     if model_type == "qwen2_moe":
65 |         from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeSparseMoeBlock
66 | 
67 |         _set_z3_leaf_modules(model, [Qwen2MoeSparseMoeBlock])
68 | 
69 | 
70 | def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
71 |     model_type = getattr(config, "model_type", None)
72 |     if model_args.moe_aux_loss_coef is not None:
73 |         if model_type in ["jamba", "mixtral", "qwen2_moe"]:
74 |             setattr(config, "router_aux_loss_coef", model_args.moe_aux_loss_coef)
75 | 
76 |         elif model_type == "deepseek":
77 |             setattr(config, "aux_loss_alpha", model_args.moe_aux_loss_coef)
78 | 
79 |         elif model_type == "jetmoe":
80 |             setattr(config, "aux_loss_coef", model_args.moe_aux_loss_coef)
81 | 
82 |     if model_type in ["dbrx", "jamba", "jetmoe", "mixtral", "qwen2_moe"]:
83 |         setattr(config, "output_router_logits", is_trainable)
84 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/rope.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 LMSYS and the LlamaFactory team.
 2 | # Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
 3 | #
 4 | # This code is inspired by the LMSYS's FastChat library.
 5 | # https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | import math
20 | from typing import TYPE_CHECKING
21 | 
22 | from ...extras import logging
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from transformers import PretrainedConfig
27 | 
28 |     from ...hparams import ModelArguments
29 | 
30 | 
31 | logger = logging.get_logger(__name__)
32 | 
33 | 
34 | def configure_rope(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
35 |     if model_args.rope_scaling is None:
36 |         return
37 | 
38 |     if not hasattr(config, "rope_scaling"):
39 |         logger.warning_rank0("Current model does not support RoPE scaling.")
40 |         return
41 | 
42 |     rope_kwargs = {}
43 |     if model_args.model_max_length is not None:
44 |         if is_trainable and model_args.rope_scaling == "dynamic":
45 |             logger.warning_rank0(
46 |                 "Dynamic NTK scaling may not work well with fine-tuning. "
47 |                 "See: https://github.com/huggingface/transformers/pull/24653"
48 |             )
49 | 
50 |         current_max_length = getattr(config, "max_position_embeddings", None)
51 |         if current_max_length and model_args.model_max_length > current_max_length:
52 |             logger.info_rank0(f"Enlarge max model length from {current_max_length} to {model_args.model_max_length}.")
53 |             setattr(config, "max_position_embeddings", model_args.model_max_length)
54 |             rope_kwargs["factor"] = float(math.ceil(model_args.model_max_length / current_max_length))
55 |         else:
56 |             logger.warning_rank0("Input length is smaller than max length. Consider increase input length.")
57 |             rope_kwargs["factor"] = 1.0
58 | 
59 |         if model_args.rope_scaling == "dynamic":
60 |             rope_kwargs["original_max_position_embeddings"] = current_max_length
61 |         elif model_args.rope_scaling == "llama3":
62 |             rope_kwargs["original_max_position_embeddings"] = current_max_length
63 |             rope_kwargs["low_freq_factor"] = 1.0
64 |             rope_kwargs["high_freq_factor"] = 4.0
65 |     else:
66 |         rope_kwargs["factor"] = 2.0
67 | 
68 |     setattr(config, "rope_scaling", {"rope_type": model_args.rope_scaling, **rope_kwargs})
69 |     logger.info_rank0(
70 |         f"Using {model_args.rope_scaling} scaling strategy and setting scaling factor to {rope_kwargs['factor']}."
71 |     )
72 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/model/model_utils/valuehead.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | import torch
18 | from transformers.utils import cached_file
19 | 
20 | from ...extras import logging
21 | from ...extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
22 | 
23 | 
24 | if TYPE_CHECKING:
25 |     from transformers import PreTrainedModel
26 | 
27 |     from ...hparams import ModelArguments
28 | 
29 | 
30 | logger = logging.get_logger(__name__)
31 | 
32 | 
33 | def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> Dict[str, torch.Tensor]:
34 |     r"""
35 |     Loads value head parameters from Hugging Face Hub or local disk.
36 | 
37 |     Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
38 |     """
39 |     kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token}
40 |     err_text = ""
41 | 
42 |     try:
43 |         from safetensors import safe_open
44 | 
45 |         vhead_file = cached_file(filename=V_HEAD_SAFE_WEIGHTS_NAME, **kwargs)
46 |         with safe_open(vhead_file, framework="pt", device="cpu") as f:
47 |             return {key: f.get_tensor(key) for key in f.keys()}
48 |     except Exception as err:
49 |         err_text = str(err)
50 | 
51 |     try:
52 |         vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs)
53 |         return torch.load(vhead_file, map_location="cpu")
54 |     except Exception as err:
55 |         err_text = str(err)
56 | 
57 |     logger.info_rank0(f"Provided path ({path_or_repo_id}) does not contain value head weights: {err_text}.")
58 |     logger.info_rank0("Ignore the above message if you are not resuming the training of a value head model.")
59 |     return None
60 | 
61 | 
62 | def prepare_valuehead_model(model: "PreTrainedModel") -> None:
63 |     if getattr(model.config, "model_type", None) == "llava":
64 |         setattr(model, "lm_head", model.language_model.get_output_embeddings())
65 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
66 | 
67 |     if getattr(model.config, "model_type", None) == "chatglm":
68 |         setattr(model, "lm_head", model.transformer.output_layer)
69 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
70 | 
71 |     if getattr(model.config, "model_type", None) == "internlm2":
72 |         setattr(model, "lm_head", model.output)
73 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
74 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/train/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/train/dpo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_dpo
16 | 
17 | 
18 | __all__ = ["run_dpo"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/kto/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_kto
16 | 
17 | 
18 | __all__ = ["run_kto"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_ppo
16 | 
17 | 
18 | __all__ = ["run_ppo"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/pt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_pt
16 | 
17 | 
18 | __all__ = ["run_pt"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/pt/trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from types import MethodType
16 | from typing import TYPE_CHECKING, Optional
17 | 
18 | import torch
19 | from transformers import Trainer
20 | from typing_extensions import override
21 | 
22 | from ...extras.packages import is_transformers_version_greater_than
23 | from ..callbacks import SaveProcessorCallback
24 | from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
25 | 
26 | 
27 | if TYPE_CHECKING:
28 |     from transformers import ProcessorMixin
29 | 
30 |     from ...hparams import FinetuningArguments
31 | 
32 | 
33 | class CustomTrainer(Trainer):
34 |     r"""
35 |     Inherits Trainer for custom optimizer.
36 |     """
37 | 
38 |     def __init__(
39 |         self, finetuning_args: "FinetuningArguments", processor: Optional["ProcessorMixin"], **kwargs
40 |     ) -> None:
41 |         if is_transformers_version_greater_than("4.46"):
42 |             kwargs["processing_class"] = kwargs.pop("tokenizer")
43 | 
44 |         super().__init__(**kwargs)
45 |         self.finetuning_args = finetuning_args
46 | 
47 |         if processor is not None:
48 |             self.add_callback(SaveProcessorCallback(processor))
49 | 
50 |         if finetuning_args.use_badam:
51 |             from badam import BAdamCallback, clip_grad_norm_old_version  # type: ignore
52 | 
53 |             self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator)
54 |             self.add_callback(BAdamCallback)
55 | 
56 |     @override
57 |     def create_optimizer(self) -> "torch.optim.Optimizer":
58 |         if self.optimizer is None:
59 |             self.optimizer = create_custom_optimizer(self.model, self.args, self.finetuning_args)
60 |         return super().create_optimizer()
61 | 
62 |     @override
63 |     def create_scheduler(
64 |         self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
65 |     ) -> "torch.optim.lr_scheduler.LRScheduler":
66 |         create_custom_scheduler(self.args, num_training_steps, optimizer)
67 |         return super().create_scheduler(num_training_steps, optimizer)
68 | 
69 |     @override
70 |     def _get_train_sampler(self) -> Optional["torch.utils.data.Sampler"]:
71 |         if self.finetuning_args.disable_shuffling:
72 |             return torch.utils.data.SequentialSampler(self.train_dataset)
73 | 
74 |         return super()._get_train_sampler()
75 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/pt/workflow.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2024 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import math
19 | from typing import TYPE_CHECKING, List, Optional
20 | 
21 | from transformers import DataCollatorForLanguageModeling
22 | 
23 | from ...data import get_dataset, get_template_and_fix_tokenizer
24 | from ...extras.ploting import plot_loss
25 | from ...model import load_model, load_tokenizer
26 | from ..trainer_utils import create_modelcard_and_push
27 | from .trainer import CustomTrainer
28 | 
29 | 
30 | if TYPE_CHECKING:
31 |     from transformers import Seq2SeqTrainingArguments, TrainerCallback
32 | 
33 |     from ...hparams import DataArguments, FinetuningArguments, ModelArguments
34 | 
35 | 
36 | def run_pt(
37 |     model_args: "ModelArguments",
38 |     data_args: "DataArguments",
39 |     training_args: "Seq2SeqTrainingArguments",
40 |     finetuning_args: "FinetuningArguments",
41 |     callbacks: Optional[List["TrainerCallback"]] = None,
42 | ):
43 |     tokenizer_module = load_tokenizer(model_args)
44 |     tokenizer = tokenizer_module["tokenizer"]
45 |     template = get_template_and_fix_tokenizer(tokenizer, data_args)
46 |     dataset_module = get_dataset(template, model_args, data_args, training_args, stage="pt", **tokenizer_module)
47 |     model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
48 |     data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
49 | 
50 |     # Initialize our Trainer
51 |     trainer = CustomTrainer(
52 |         model=model,
53 |         args=training_args,
54 |         finetuning_args=finetuning_args,
55 |         data_collator=data_collator,
56 |         callbacks=callbacks,
57 |         **dataset_module,
58 |         **tokenizer_module,
59 |     )
60 | 
61 |     # Training
62 |     if training_args.do_train:
63 |         train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
64 |         trainer.save_model()
65 |         trainer.log_metrics("train", train_result.metrics)
66 |         trainer.save_metrics("train", train_result.metrics)
67 |         trainer.save_state()
68 |         if trainer.is_world_process_zero() and finetuning_args.plot_loss:
69 |             plot_loss(training_args.output_dir, keys=["loss", "eval_loss"])
70 | 
71 |     # Evaluation
72 |     if training_args.do_eval:
73 |         metrics = trainer.evaluate(metric_key_prefix="eval")
74 |         try:
75 |             perplexity = math.exp(metrics["eval_loss"])
76 |         except OverflowError:
77 |             perplexity = float("inf")
78 | 
79 |         metrics["perplexity"] = perplexity
80 |         trainer.log_metrics("eval", metrics)
81 |         trainer.save_metrics("eval", metrics)
82 | 
83 |     # Create model card
84 |     create_modelcard_and_push(trainer, model_args, data_args, training_args, finetuning_args)
85 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/rm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_rm
16 | 
17 | 
18 | __all__ = ["run_rm"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/rm/metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import TYPE_CHECKING, Dict, Optional
17 | 
18 | import numpy as np
19 | 
20 | from ...extras.misc import numpify
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from transformers import EvalPrediction
25 | 
26 | 
27 | @dataclass
28 | class ComputeAccuracy:
29 |     r"""
30 |     Computes reward accuracy and supports `batch_eval_metrics`.
31 |     """
32 | 
33 |     def _dump(self) -> Optional[Dict[str, float]]:
34 |         result = None
35 |         if hasattr(self, "score_dict"):
36 |             result = {k: float(np.mean(v)) for k, v in self.score_dict.items()}
37 | 
38 |         self.score_dict = {"accuracy": []}
39 |         return result
40 | 
41 |     def __post_init__(self):
42 |         self._dump()
43 | 
44 |     def __call__(self, eval_preds: "EvalPrediction", compute_result: bool = True) -> Optional[Dict[str, float]]:
45 |         chosen_scores, rejected_scores = numpify(eval_preds.predictions[0]), numpify(eval_preds.predictions[1])
46 |         if not chosen_scores.shape:
47 |             self.score_dict["accuracy"].append(chosen_scores > rejected_scores)
48 |         else:
49 |             for i in range(len(chosen_scores)):
50 |                 self.score_dict["accuracy"].append(chosen_scores[i] > rejected_scores[i])
51 | 
52 |         if compute_result:
53 |             return self._dump()
54 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/train/sft/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_sft
16 | 
17 | 
18 | __all__ = ["run_sft"]
19 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GAIR-NLP/LIMO/e9951354af219d8c4d454e25ae348395e6598ab0/train/src/llamafactory/webui/__init__.py


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/components/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .chatbot import create_chat_box
16 | from .eval import create_eval_tab
17 | from .export import create_export_tab
18 | from .infer import create_infer_tab
19 | from .top import create_top
20 | from .train import create_train_tab
21 | 
22 | 
23 | __all__ = [
24 |     "create_chat_box",
25 |     "create_eval_tab",
26 |     "create_export_tab",
27 |     "create_infer_tab",
28 |     "create_top",
29 |     "create_train_tab",
30 | ]
31 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/components/infer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | from ...extras.packages import is_gradio_available
18 | from ..common import is_multimodal
19 | from .chatbot import create_chat_box
20 | 
21 | 
22 | if is_gradio_available():
23 |     import gradio as gr
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from gradio.components import Component
28 | 
29 |     from ..engine import Engine
30 | 
31 | 
32 | def create_infer_tab(engine: "Engine") -> Dict[str, "Component"]:
33 |     input_elems = engine.manager.get_base_elems()
34 |     elem_dict = dict()
35 | 
36 |     with gr.Row():
37 |         infer_backend = gr.Dropdown(choices=["huggingface", "vllm"], value="huggingface")
38 |         infer_dtype = gr.Dropdown(choices=["auto", "float16", "bfloat16", "float32"], value="auto")
39 | 
40 |     with gr.Row():
41 |         load_btn = gr.Button()
42 |         unload_btn = gr.Button()
43 | 
44 |     info_box = gr.Textbox(show_label=False, interactive=False)
45 | 
46 |     input_elems.update({infer_backend, infer_dtype})
47 |     elem_dict.update(
48 |         dict(
49 |             infer_backend=infer_backend,
50 |             infer_dtype=infer_dtype,
51 |             load_btn=load_btn,
52 |             unload_btn=unload_btn,
53 |             info_box=info_box,
54 |         )
55 |     )
56 | 
57 |     chatbot, messages, chat_elems = create_chat_box(engine, visible=False)
58 |     elem_dict.update(chat_elems)
59 | 
60 |     load_btn.click(engine.chatter.load_model, input_elems, [info_box]).then(
61 |         lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]]
62 |     )
63 | 
64 |     unload_btn.click(engine.chatter.unload_model, input_elems, [info_box]).then(
65 |         lambda: ([], []), outputs=[chatbot, messages]
66 |     ).then(lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]])
67 | 
68 |     engine.manager.get_elem_by_id("top.model_name").change(
69 |         lambda model_name: gr.Column(visible=is_multimodal(model_name)),
70 |         [engine.manager.get_elem_by_id("top.model_name")],
71 |         [chat_elems["mm_box"]],
72 |     )
73 | 
74 |     return elem_dict
75 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/components/top.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict
16 | 
17 | from ...data import TEMPLATES
18 | from ...extras.constants import METHODS, SUPPORTED_MODELS
19 | from ...extras.packages import is_gradio_available
20 | from ..common import save_config
21 | from ..control import can_quantize, can_quantize_to, get_model_info, list_checkpoints
22 | 
23 | 
24 | if is_gradio_available():
25 |     import gradio as gr
26 | 
27 | 
28 | if TYPE_CHECKING:
29 |     from gradio.components import Component
30 | 
31 | 
32 | def create_top() -> Dict[str, "Component"]:
33 |     with gr.Row():
34 |         lang = gr.Dropdown(choices=["en", "ru", "zh", "ko", "ja"], value=None, scale=1)
35 |         available_models = list(SUPPORTED_MODELS.keys()) + ["Custom"]
36 |         model_name = gr.Dropdown(choices=available_models, value=None, scale=3)
37 |         model_path = gr.Textbox(scale=3)
38 | 
39 |     with gr.Row():
40 |         finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
41 |         checkpoint_path = gr.Dropdown(multiselect=True, allow_custom_value=True, scale=6)
42 | 
43 |     with gr.Row():
44 |         quantization_bit = gr.Dropdown(choices=["none", "8", "4"], value="none", allow_custom_value=True)
45 |         quantization_method = gr.Dropdown(choices=["bitsandbytes", "hqq", "eetq"], value="bitsandbytes")
46 |         template = gr.Dropdown(choices=list(TEMPLATES.keys()), value="default")
47 |         rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic", "yarn", "llama3"], value="none")
48 |         booster = gr.Dropdown(choices=["auto", "flashattn2", "unsloth", "liger_kernel"], value="auto")
49 | 
50 |     model_name.change(get_model_info, [model_name], [model_path, template], queue=False).then(
51 |         list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
52 |     )
53 |     model_name.input(save_config, inputs=[lang, model_name], queue=False)
54 |     model_path.input(save_config, inputs=[lang, model_name, model_path], queue=False)
55 |     finetuning_type.change(can_quantize, [finetuning_type], [quantization_bit], queue=False).then(
56 |         list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False
57 |     )
58 |     checkpoint_path.focus(list_checkpoints, [model_name, finetuning_type], [checkpoint_path], queue=False)
59 |     quantization_method.change(can_quantize_to, [quantization_method], [quantization_bit], queue=False)
60 | 
61 |     return dict(
62 |         lang=lang,
63 |         model_name=model_name,
64 |         model_path=model_path,
65 |         finetuning_type=finetuning_type,
66 |         checkpoint_path=checkpoint_path,
67 |         quantization_bit=quantization_bit,
68 |         quantization_method=quantization_method,
69 |         template=template,
70 |         rope_scaling=rope_scaling,
71 |         booster=booster,
72 |     )
73 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/css.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CSS = r"""
16 | .duplicate-button {
17 |   margin: auto !important;
18 |   color: white !important;
19 |   background: black !important;
20 |   border-radius: 100vh !important;
21 | }
22 | 
23 | .thinking-summary {
24 |   padding: 8px !important;
25 | }
26 | 
27 | .thinking-summary span {
28 |   border: 1px solid #e0e0e0 !important;
29 |   border-radius: 4px !important;
30 |   padding: 4px !important;
31 |   cursor: pointer !important;
32 |   font-size: 14px !important;
33 |   background: #333333 !important;
34 | }
35 | 
36 | .thinking-container {
37 |   border-left: 2px solid #a6a6a6 !important;
38 |   padding-left: 10px !important;
39 |   margin: 4px 0 !important;
40 | }
41 | 
42 | .thinking-container p {
43 |   color: #a6a6a6 !important;
44 | }
45 | 
46 | .modal-box {
47 |   position: fixed !important;
48 |   top: 50%;
49 |   left: 50%;
50 |   transform: translate(-50%, -50%); /* center horizontally */
51 |   max-width: 1000px;
52 |   max-height: 750px;
53 |   overflow-y: auto;
54 |   background-color: var(--input-background-fill);
55 |   flex-wrap: nowrap !important;
56 |   border: 2px solid black !important;
57 |   z-index: 1000;
58 |   padding: 10px;
59 | }
60 | 
61 | .dark .modal-box {
62 |   border: 2px solid white !important;
63 | }
64 | """
65 | 


--------------------------------------------------------------------------------
/train/src/llamafactory/webui/manager.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING, Dict, Generator, List, Set, Tuple
16 | 
17 | 
18 | if TYPE_CHECKING:
19 |     from gradio.components import Component
20 | 
21 | 
22 | class Manager:
23 |     r"""
24 |     A class to manage all the gradio components in Web UI.
25 |     """
26 | 
27 |     def __init__(self) -> None:
28 |         self._id_to_elem: Dict[str, "Component"] = {}
29 |         self._elem_to_id: Dict["Component", str] = {}
30 | 
31 |     def add_elems(self, tab_name: str, elem_dict: Dict[str, "Component"]) -> None:
32 |         r"""
33 |         Adds elements to manager.
34 |         """
35 |         for elem_name, elem in elem_dict.items():
36 |             elem_id = f"{tab_name}.{elem_name}"
37 |             self._id_to_elem[elem_id] = elem
38 |             self._elem_to_id[elem] = elem_id
39 | 
40 |     def get_elem_list(self) -> List["Component"]:
41 |         r"""
42 |         Returns the list of all elements.
43 |         """
44 |         return list(self._id_to_elem.values())
45 | 
46 |     def get_elem_iter(self) -> Generator[Tuple[str, "Component"], None, None]:
47 |         r"""
48 |         Returns an iterator over all elements with their names.
49 |         """
50 |         for elem_id, elem in self._id_to_elem.items():
51 |             yield elem_id.split(".")[-1], elem
52 | 
53 |     def get_elem_by_id(self, elem_id: str) -> "Component":
54 |         r"""
55 |         Gets element by id.
56 | 
57 |         Example: top.lang, train.dataset
58 |         """
59 |         return self._id_to_elem[elem_id]
60 | 
61 |     def get_id_by_elem(self, elem: "Component") -> str:
62 |         r"""
63 |         Gets id by element.
64 |         """
65 |         return self._elem_to_id[elem]
66 | 
67 |     def get_base_elems(self) -> Set["Component"]:
68 |         r"""
69 |         Gets the base elements that are commonly used.
70 |         """
71 |         return {
72 |             self._id_to_elem["top.lang"],
73 |             self._id_to_elem["top.model_name"],
74 |             self._id_to_elem["top.model_path"],
75 |             self._id_to_elem["top.finetuning_type"],
76 |             self._id_to_elem["top.checkpoint_path"],
77 |             self._id_to_elem["top.quantization_bit"],
78 |             self._id_to_elem["top.quantization_method"],
79 |             self._id_to_elem["top.template"],
80 |             self._id_to_elem["top.rope_scaling"],
81 |             self._id_to_elem["top.booster"],
82 |         }
83 | 


--------------------------------------------------------------------------------
/train/src/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp
16 | 
17 | 
18 | def main():
19 |     run_exp()
20 | 
21 | 
22 | def _mp_fn(index):
23 |     # For xla_spawn (TPUs)
24 |     run_exp()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/train/src/webui.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.webui.interface import create_ui
18 | 
19 | 
20 | def main():
21 |     gradio_ipv6 = os.getenv("GRADIO_IPV6", "0").lower() in ["true", "1"]
22 |     gradio_share = os.getenv("GRADIO_SHARE", "0").lower() in ["true", "1"]
23 |     server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
24 |     create_ui().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/train/tests/data/processors/test_feedback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.extras.constants import IGNORE_INDEX
23 | from llamafactory.train.test_utils import load_train_dataset
24 | 
25 | 
26 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
27 | 
28 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
29 | 
30 | TRAIN_ARGS = {
31 |     "model_name_or_path": TINY_LLAMA,
32 |     "stage": "kto",
33 |     "do_train": True,
34 |     "finetuning_type": "full",
35 |     "dataset": "kto_en_demo",
36 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
37 |     "template": "llama3",
38 |     "cutoff_len": 8192,
39 |     "overwrite_cache": True,
40 |     "output_dir": "dummy_dir",
41 |     "overwrite_output_dir": True,
42 |     "fp16": True,
43 | }
44 | 
45 | 
46 | @pytest.mark.parametrize("num_samples", [16])
47 | def test_feedback_data(num_samples: int):
48 |     train_dataset = load_train_dataset(**TRAIN_ARGS)
49 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
50 |     original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
51 |     indexes = random.choices(range(len(original_data)), k=num_samples)
52 |     for index in indexes:
53 |         messages = original_data["messages"][index]
54 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages)
55 |         prompt_len = len(ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True))
56 |         ref_labels = [IGNORE_INDEX] * prompt_len + ref_input_ids[prompt_len:]
57 |         assert train_dataset["input_ids"][index] == ref_input_ids
58 |         assert train_dataset["labels"][index] == ref_labels
59 |         assert train_dataset["kto_tags"][index] == original_data["label"][index]
60 | 


--------------------------------------------------------------------------------
/train/tests/data/processors/test_processor_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Tuple
16 | 
17 | import pytest
18 | 
19 | from llamafactory.data.processors.processor_utils import infer_seqlen
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     "test_input,test_output",
24 |     [
25 |         ((3000, 2000, 1000), (600, 400)),
26 |         ((2000, 3000, 1000), (400, 600)),
27 |         ((1000, 100, 1000), (900, 100)),
28 |         ((100, 1000, 1000), (100, 900)),
29 |         ((100, 500, 1000), (100, 500)),
30 |         ((500, 100, 1000), (500, 100)),
31 |         ((10, 10, 1000), (10, 10)),
32 |     ],
33 | )
34 | def test_infer_seqlen(test_input: Tuple[int, int, int], test_output: Tuple[int, int]):
35 |     assert test_output == infer_seqlen(*test_input)
36 | 


--------------------------------------------------------------------------------
/train/tests/data/processors/test_unsupervised.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.train.test_utils import load_train_dataset
23 | 
24 | 
25 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
26 | 
27 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
28 | 
29 | TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
30 | 
31 | TRAIN_ARGS = {
32 |     "model_name_or_path": TINY_LLAMA,
33 |     "stage": "ppo",
34 |     "do_train": True,
35 |     "finetuning_type": "full",
36 |     "reward_model": "",
37 |     "reward_model_type": "full",
38 |     "dataset": "system_chat",
39 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
40 |     "template": "llama3",
41 |     "cutoff_len": 8192,
42 |     "overwrite_cache": True,
43 |     "output_dir": "dummy_dir",
44 |     "overwrite_output_dir": True,
45 |     "fp16": True,
46 | }
47 | 
48 | 
49 | @pytest.mark.parametrize("num_samples", [16])
50 | def test_unsupervised_data(num_samples: int):
51 |     train_dataset = load_train_dataset(**TRAIN_ARGS)
52 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
53 |     original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
54 |     indexes = random.choices(range(len(original_data)), k=num_samples)
55 |     for index in indexes:
56 |         messages = original_data["messages"][index]
57 |         ref_ids = ref_tokenizer.apply_chat_template(messages)
58 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True)
59 |         ref_labels = ref_ids[len(ref_input_ids) :]
60 |         assert train_dataset["input_ids"][index] == ref_input_ids
61 |         assert train_dataset["labels"][index] == ref_labels
62 | 


--------------------------------------------------------------------------------
/train/tests/e2e/test_chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.chat import ChatModel
18 | 
19 | 
20 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
21 | 
22 | INFER_ARGS = {
23 |     "model_name_or_path": TINY_LLAMA,
24 |     "finetuning_type": "lora",
25 |     "template": "llama3",
26 |     "infer_dtype": "float16",
27 |     "do_sample": False,
28 |     "max_new_tokens": 1,
29 | }
30 | 
31 | MESSAGES = [
32 |     {"role": "user", "content": "Hi"},
33 | ]
34 | 
35 | EXPECTED_RESPONSE = "_rho"
36 | 
37 | 
38 | def test_chat():
39 |     chat_model = ChatModel(INFER_ARGS)
40 |     assert chat_model.chat(MESSAGES)[0].response_text == EXPECTED_RESPONSE
41 | 
42 | 
43 | def test_stream_chat():
44 |     chat_model = ChatModel(INFER_ARGS)
45 |     response = ""
46 |     for token in chat_model.stream_chat(MESSAGES):
47 |         response += token
48 | 
49 |     assert response == EXPECTED_RESPONSE
50 | 


--------------------------------------------------------------------------------
/train/tests/e2e/test_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.tuner import export_model, run_exp
20 | 
21 | 
22 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
23 | 
24 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
25 | 
26 | TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
27 | 
28 | TRAIN_ARGS = {
29 |     "model_name_or_path": TINY_LLAMA,
30 |     "do_train": True,
31 |     "finetuning_type": "lora",
32 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
33 |     "template": "llama3",
34 |     "cutoff_len": 1,
35 |     "overwrite_cache": False,
36 |     "overwrite_output_dir": True,
37 |     "per_device_train_batch_size": 1,
38 |     "max_steps": 1,
39 | }
40 | 
41 | INFER_ARGS = {
42 |     "model_name_or_path": TINY_LLAMA,
43 |     "adapter_name_or_path": TINY_LLAMA_ADAPTER,
44 |     "finetuning_type": "lora",
45 |     "template": "llama3",
46 |     "infer_dtype": "float16",
47 | }
48 | 
49 | OS_NAME = os.getenv("OS_NAME", "")
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "stage,dataset",
54 |     [
55 |         ("pt", "c4_demo"),
56 |         ("sft", "alpaca_en_demo"),
57 |         ("dpo", "dpo_en_demo"),
58 |         ("kto", "kto_en_demo"),
59 |         pytest.param("rm", "dpo_en_demo", marks=pytest.mark.xfail(OS_NAME.startswith("windows"), reason="OS error.")),
60 |     ],
61 | )
62 | def test_run_exp(stage: str, dataset: str):
63 |     output_dir = os.path.join("output", f"train_{stage}")
64 |     run_exp({"stage": stage, "dataset": dataset, "output_dir": output_dir, **TRAIN_ARGS})
65 |     assert os.path.exists(output_dir)
66 | 
67 | 
68 | def test_export():
69 |     export_dir = os.path.join("output", "llama3_export")
70 |     export_model({"export_dir": export_dir, **INFER_ARGS})
71 |     assert os.path.exists(export_dir)
72 | 


--------------------------------------------------------------------------------
/train/tests/eval/test_eval_template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.eval.template import get_eval_template
16 | 
17 | 
18 | def test_eval_template_en():
19 |     support_set = [
20 |         {
21 |             "question": "Fewshot question",
22 |             "A": "Fewshot1",
23 |             "B": "Fewshot2",
24 |             "C": "Fewshot3",
25 |             "D": "Fewshot4",
26 |             "answer": "B",
27 |         }
28 |     ]
29 |     example = {
30 |         "question": "Target question",
31 |         "A": "Target1",
32 |         "B": "Target2",
33 |         "C": "Target3",
34 |         "D": "Target4",
35 |         "answer": "C",
36 |     }
37 |     template = get_eval_template(name="en")
38 |     messages = template.format_example(example, support_set=support_set, subject_name="SubName")
39 |     assert messages == [
40 |         {
41 |             "role": "user",
42 |             "content": (
43 |                 "The following are multiple choice questions (with answers) about SubName.\n\n"
44 |                 "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
45 |             ),
46 |         },
47 |         {"role": "assistant", "content": "B"},
48 |         {
49 |             "role": "user",
50 |             "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
51 |         },
52 |         {"role": "assistant", "content": "C"},
53 |     ]
54 | 
55 | 
56 | def test_eval_template_zh():
57 |     support_set = [
58 |         {
59 |             "question": "示例问题",
60 |             "A": "示例答案1",
61 |             "B": "示例答案2",
62 |             "C": "示例答案3",
63 |             "D": "示例答案4",
64 |             "answer": "B",
65 |         }
66 |     ]
67 |     example = {
68 |         "question": "目标问题",
69 |         "A": "目标答案1",
70 |         "B": "目标答案2",
71 |         "C": "目标答案3",
72 |         "D": "目标答案4",
73 |         "answer": "C",
74 |     }
75 |     template = get_eval_template(name="zh")
76 |     messages = template.format_example(example, support_set=support_set, subject_name="主题")
77 |     assert messages == [
78 |         {
79 |             "role": "user",
80 |             "content": (
81 |                 "以下是中国关于主题考试的单项选择题，请选出其中的正确答案。\n\n"
82 |                 "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案："
83 |             ),
84 |         },
85 |         {"role": "assistant", "content": "B"},
86 |         {
87 |             "role": "user",
88 |             "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案：",
89 |         },
90 |         {"role": "assistant", "content": "C"},
91 |     ]
92 | 


--------------------------------------------------------------------------------
/train/tests/model/model_utils/test_attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
19 | 
20 | from llamafactory.extras.packages import is_transformers_version_greater_than
21 | from llamafactory.train.test_utils import load_infer_model
22 | 
23 | 
24 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "template": "llama3",
29 | }
30 | 
31 | 
32 | @pytest.mark.xfail(is_transformers_version_greater_than("4.48"), reason="Attention refactor.")
33 | def test_attention():
34 |     attention_available = ["disabled"]
35 |     if is_torch_sdpa_available():
36 |         attention_available.append("sdpa")
37 | 
38 |     if is_flash_attn_2_available():
39 |         attention_available.append("fa2")
40 | 
41 |     llama_attention_classes = {
42 |         "disabled": "LlamaAttention",
43 |         "sdpa": "LlamaSdpaAttention",
44 |         "fa2": "LlamaFlashAttention2",
45 |     }
46 |     for requested_attention in attention_available:
47 |         model = load_infer_model(flash_attn=requested_attention, **INFER_ARGS)
48 |         for module in model.modules():
49 |             if "Attention" in module.__class__.__name__:
50 |                 assert module.__class__.__name__ == llama_attention_classes[requested_attention]
51 | 


--------------------------------------------------------------------------------
/train/tests/model/model_utils/test_checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | import torch
19 | 
20 | from llamafactory.extras.misc import get_current_device
21 | from llamafactory.train.test_utils import load_train_model
22 | 
23 | 
24 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "lora",
31 |     "lora_target": "all",
32 |     "dataset": "llamafactory/tiny-supervised-dataset",
33 |     "dataset_dir": "ONLINE",
34 |     "template": "llama3",
35 |     "cutoff_len": 1024,
36 |     "overwrite_cache": True,
37 |     "output_dir": "dummy_dir",
38 |     "overwrite_output_dir": True,
39 |     "fp16": True,
40 | }
41 | 
42 | 
43 | @pytest.mark.parametrize("disable_gradient_checkpointing", [False, True])
44 | def test_vanilla_checkpointing(disable_gradient_checkpointing: bool):
45 |     model = load_train_model(disable_gradient_checkpointing=disable_gradient_checkpointing, **TRAIN_ARGS)
46 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
47 |         assert getattr(module, "gradient_checkpointing") != disable_gradient_checkpointing
48 | 
49 | 
50 | def test_unsloth_gradient_checkpointing():
51 |     model = load_train_model(use_unsloth_gc=True, **TRAIN_ARGS)
52 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
53 |         assert module._gradient_checkpointing_func.__self__.__name__ == "UnslothGradientCheckpointing"
54 | 
55 | 
56 | def test_upcast_layernorm():
57 |     model = load_train_model(upcast_layernorm=True, **TRAIN_ARGS)
58 |     for name, param in model.named_parameters():
59 |         if param.ndim == 1 and "norm" in name:
60 |             assert param.dtype == torch.float32
61 | 
62 | 
63 | def test_upcast_lmhead_output():
64 |     model = load_train_model(upcast_lmhead_output=True, **TRAIN_ARGS)
65 |     inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device())
66 |     outputs: "torch.Tensor" = model.get_output_embeddings()(inputs)
67 |     assert outputs.dtype == torch.float32
68 | 


--------------------------------------------------------------------------------
/train/tests/model/model_utils/test_misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | import torch
19 | from transformers import AutoConfig, AutoModelForCausalLM
20 | 
21 | from llamafactory.model.model_utils.misc import find_expanded_modules
22 | 
23 | 
24 | HF_TOKEN = os.getenv("HF_TOKEN")
25 | 
26 | 
27 | @pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
28 | def test_expanded_modules():
29 |     config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
30 |     with torch.device("meta"):
31 |         model = AutoModelForCausalLM.from_config(config)
32 | 
33 |     expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4)
34 |     assert expanded_modules == [
35 |         "model.layers.7.self_attn.q_proj",
36 |         "model.layers.7.self_attn.v_proj",
37 |         "model.layers.15.self_attn.q_proj",
38 |         "model.layers.15.self_attn.v_proj",
39 |         "model.layers.23.self_attn.q_proj",
40 |         "model.layers.23.self_attn.v_proj",
41 |         "model.layers.31.self_attn.q_proj",
42 |         "model.layers.31.self_attn.v_proj",
43 |     ]
44 | 


--------------------------------------------------------------------------------
/train/tests/model/model_utils/test_packing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | import torch
17 | 
18 | from llamafactory.model.model_utils.packing import get_seqlens_in_batch, get_unpad_data
19 | 
20 | 
21 | @pytest.mark.parametrize(
22 |     "attention_mask,golden_seq_lens",
23 |     [
24 |         (
25 |             [
26 |                 [1, 1, 2, 2, 2, 0],
27 |                 [1, 2, 2, 3, 3, 3],
28 |             ],
29 |             [2, 3, 1, 2, 3],
30 |         ),
31 |         (
32 |             [[1]],
33 |             [1],
34 |         ),
35 |     ],
36 | )
37 | def test_get_seqlens_in_batch(attention_mask, golden_seq_lens):
38 |     attention_mask_with_indices = torch.tensor(attention_mask)
39 |     seqlens_in_batch = get_seqlens_in_batch(attention_mask_with_indices)
40 |     assert torch.all(seqlens_in_batch == torch.tensor(golden_seq_lens))
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     "attention_mask,golden_indices,golden_cu_seqlens,golden_max_seqlen",
45 |     [
46 |         (
47 |             [
48 |                 [1, 1, 2, 2, 2, 0],
49 |                 [1, 2, 2, 3, 3, 3],
50 |             ],
51 |             [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11],
52 |             [0, 2, 5, 6, 8, 11],
53 |             3,
54 |         ),
55 |         (
56 |             [[1]],
57 |             [0],
58 |             [0, 1],
59 |             1,
60 |         ),
61 |     ],
62 | )
63 | def test_get_unpad_data(attention_mask, golden_indices, golden_cu_seqlens, golden_max_seqlen):
64 |     attention_mask_with_indices = torch.tensor(attention_mask)
65 |     indices, cu_seqlens, max_seqlen_in_batch = get_unpad_data(attention_mask_with_indices)
66 |     assert torch.all(indices == torch.tensor(golden_indices))
67 |     assert torch.all(cu_seqlens == torch.tensor(golden_cu_seqlens, dtype=torch.int32))
68 |     assert max_seqlen_in_batch == golden_max_seqlen
69 | 


--------------------------------------------------------------------------------
/train/tests/model/model_utils/test_visual.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | import torch
17 | from transformers import AutoConfig, AutoModelForVision2Seq
18 | 
19 | from llamafactory.hparams import FinetuningArguments, ModelArguments
20 | from llamafactory.model.adapter import init_adapter
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     "freeze_vision_tower,freeze_multi_modal_projector,train_mm_proj_only",
25 |     [
26 |         (False, False, False),
27 |         (False, True, False),
28 |         (True, False, False),
29 |         (True, True, False),
30 |         (True, False, True),
31 |     ],
32 | )
33 | def test_visual_full(freeze_vision_tower: bool, freeze_multi_modal_projector: bool, train_mm_proj_only: bool):
34 |     model_args = ModelArguments(model_name_or_path="Qwen/Qwen2-VL-2B-Instruct")
35 |     finetuning_args = FinetuningArguments(
36 |         finetuning_type="full",
37 |         freeze_vision_tower=freeze_vision_tower,
38 |         freeze_multi_modal_projector=freeze_multi_modal_projector,
39 |         train_mm_proj_only=train_mm_proj_only,
40 |     )
41 |     config = AutoConfig.from_pretrained(model_args.model_name_or_path)
42 |     with torch.device("meta"):
43 |         model = AutoModelForVision2Seq.from_config(config)
44 | 
45 |     model = init_adapter(config, model, model_args, finetuning_args, is_trainable=True)
46 |     for name, param in model.named_parameters():
47 |         if any(key in name for key in ["visual.patch_embed", "visual.blocks"]):
48 |             assert param.requires_grad != freeze_vision_tower
49 |         elif "visual.merger" in name:
50 |             assert param.requires_grad != freeze_multi_modal_projector
51 |         else:
52 |             assert param.requires_grad != train_mm_proj_only
53 | 
54 | 
55 | @pytest.mark.parametrize("freeze_vision_tower", [False, True])
56 | def test_visual_lora(freeze_vision_tower: bool):
57 |     model_args = ModelArguments(model_name_or_path="Qwen/Qwen2-VL-2B-Instruct")
58 |     finetuning_args = FinetuningArguments(finetuning_type="lora", freeze_vision_tower=freeze_vision_tower)
59 |     config = AutoConfig.from_pretrained(model_args.model_name_or_path)
60 |     with torch.device("meta"):
61 |         model = AutoModelForVision2Seq.from_config(config)
62 | 
63 |     model = init_adapter(config, model, model_args, finetuning_args, is_trainable=True)
64 |     trainable_params, frozen_params = set(), set()
65 |     for name, param in model.named_parameters():
66 |         if param.requires_grad:
67 |             trainable_params.add(name)
68 |         else:
69 |             frozen_params.add(name)
70 | 
71 |     if freeze_vision_tower:
72 |         assert "base_model.model.visual.blocks.0.attn.qkv.lora_A.default.weight" not in trainable_params
73 |     else:
74 |         assert "base_model.model.visual.blocks.0.attn.qkv.lora_A.default.weight" in trainable_params
75 | 
76 |     assert "merger" not in trainable_params
77 |     assert "base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight" in trainable_params
78 | 


--------------------------------------------------------------------------------
/train/tests/model/test_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, patch_valuehead_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "template": "llama3",
29 |     "infer_dtype": "float16",
30 | }
31 | 
32 | 
33 | @pytest.fixture
34 | def fix_valuehead_cpu_loading():
35 |     patch_valuehead_model()
36 | 
37 | 
38 | def test_base():
39 |     model = load_infer_model(**INFER_ARGS)
40 |     ref_model = load_reference_model(TINY_LLAMA)
41 |     compare_model(model, ref_model)
42 | 
43 | 
44 | @pytest.mark.usefixtures("fix_valuehead_cpu_loading")
45 | def test_valuehead():
46 |     model = load_infer_model(add_valuehead=True, **INFER_ARGS)
47 |     ref_model = load_reference_model(TINY_LLAMA_VALUEHEAD, add_valuehead=True)
48 |     compare_model(model, ref_model)
49 | 


--------------------------------------------------------------------------------
/train/tests/model/test_freeze.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "freeze",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "overwrite_cache": True,
34 |     "output_dir": "dummy_dir",
35 |     "overwrite_output_dir": True,
36 |     "fp16": True,
37 | }
38 | 
39 | INFER_ARGS = {
40 |     "model_name_or_path": TINY_LLAMA,
41 |     "finetuning_type": "freeze",
42 |     "template": "llama3",
43 |     "infer_dtype": "float16",
44 | }
45 | 
46 | 
47 | def test_freeze_train_all_modules():
48 |     model = load_train_model(freeze_trainable_layers=1, **TRAIN_ARGS)
49 |     for name, param in model.named_parameters():
50 |         if name.startswith("model.layers.1."):
51 |             assert param.requires_grad is True
52 |             assert param.dtype == torch.float32
53 |         else:
54 |             assert param.requires_grad is False
55 |             assert param.dtype == torch.float16
56 | 
57 | 
58 | def test_freeze_train_extra_modules():
59 |     model = load_train_model(freeze_trainable_layers=1, freeze_extra_modules="embed_tokens,lm_head", **TRAIN_ARGS)
60 |     for name, param in model.named_parameters():
61 |         if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
62 |             assert param.requires_grad is True
63 |             assert param.dtype == torch.float32
64 |         else:
65 |             assert param.requires_grad is False
66 |             assert param.dtype == torch.float16
67 | 
68 | 
69 | def test_freeze_inference():
70 |     model = load_infer_model(**INFER_ARGS)
71 |     for param in model.parameters():
72 |         assert param.requires_grad is False
73 |         assert param.dtype == torch.float16
74 | 


--------------------------------------------------------------------------------
/train/tests/model/test_full.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "full",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "overwrite_cache": True,
34 |     "output_dir": "dummy_dir",
35 |     "overwrite_output_dir": True,
36 |     "fp16": True,
37 | }
38 | 
39 | INFER_ARGS = {
40 |     "model_name_or_path": TINY_LLAMA,
41 |     "finetuning_type": "full",
42 |     "template": "llama3",
43 |     "infer_dtype": "float16",
44 | }
45 | 
46 | 
47 | def test_full_train():
48 |     model = load_train_model(**TRAIN_ARGS)
49 |     for param in model.parameters():
50 |         assert param.requires_grad is True
51 |         assert param.dtype == torch.float32
52 | 
53 | 
54 | def test_full_inference():
55 |     model = load_infer_model(**INFER_ARGS)
56 |     for param in model.parameters():
57 |         assert param.requires_grad is False
58 |         assert param.dtype == torch.float16
59 | 


--------------------------------------------------------------------------------
/train/tests/model/test_pissa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_PISSA = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "lora",
31 |     "pissa_init": True,
32 |     "pissa_iter": -1,
33 |     "dataset": "llamafactory/tiny-supervised-dataset",
34 |     "dataset_dir": "ONLINE",
35 |     "template": "llama3",
36 |     "cutoff_len": 1024,
37 |     "overwrite_cache": True,
38 |     "output_dir": "dummy_dir",
39 |     "overwrite_output_dir": True,
40 |     "fp16": True,
41 | }
42 | 
43 | INFER_ARGS = {
44 |     "model_name_or_path": TINY_LLAMA_PISSA,
45 |     "adapter_name_or_path": TINY_LLAMA_PISSA,
46 |     "adapter_folder": "pissa_init",
47 |     "finetuning_type": "lora",
48 |     "template": "llama3",
49 |     "infer_dtype": "float16",
50 | }
51 | 
52 | OS_NAME = os.getenv("OS_NAME", "")
53 | 
54 | 
55 | @pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.")
56 | def test_pissa_train():
57 |     model = load_train_model(**TRAIN_ARGS)
58 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=True)
59 |     compare_model(model, ref_model)
60 | 
61 | 
62 | @pytest.mark.xfail(OS_NAME.startswith("windows"), reason="Known connection error on Windows.")
63 | def test_pissa_inference():
64 |     model = load_infer_model(**INFER_ARGS)
65 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)
66 |     ref_model = ref_model.merge_and_unload()
67 |     compare_model(model, ref_model)
68 | 


--------------------------------------------------------------------------------
/train/tests/train/test_sft_trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass, field
17 | from typing import Any, Dict, List
18 | 
19 | import pytest
20 | from transformers import DataCollatorWithPadding
21 | 
22 | from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
23 | from llamafactory.hparams import get_train_args
24 | from llamafactory.model import load_model, load_tokenizer
25 | from llamafactory.train.sft.trainer import CustomSeq2SeqTrainer
26 | 
27 | 
28 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
29 | 
30 | TINY_LLAMA = os.getenv("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
31 | 
32 | TRAIN_ARGS = {
33 |     "model_name_or_path": TINY_LLAMA,
34 |     "stage": "sft",
35 |     "do_train": True,
36 |     "finetuning_type": "lora",
37 |     "dataset": "llamafactory/tiny-supervised-dataset",
38 |     "dataset_dir": "ONLINE",
39 |     "template": "llama3",
40 |     "cutoff_len": 1024,
41 |     "overwrite_cache": False,
42 |     "overwrite_output_dir": True,
43 |     "per_device_train_batch_size": 1,
44 |     "max_steps": 1,
45 | }
46 | 
47 | 
48 | @dataclass
49 | class DataCollatorWithVerbose(DataCollatorWithPadding):
50 |     verbose_list: List[Dict[str, Any]] = field(default_factory=list)
51 | 
52 |     def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
53 |         self.verbose_list.extend(features)
54 |         batch = super().__call__(features)
55 |         return {k: v[:, :1] for k, v in batch.items()}  # truncate input length
56 | 
57 | 
58 | @pytest.mark.parametrize("disable_shuffling", [False, True])
59 | def test_shuffle(disable_shuffling: bool):
60 |     model_args, data_args, training_args, finetuning_args, _ = get_train_args(
61 |         {
62 |             "output_dir": os.path.join("output", f"shuffle{str(disable_shuffling).lower()}"),
63 |             "disable_shuffling": disable_shuffling,
64 |             **TRAIN_ARGS,
65 |         }
66 |     )
67 |     tokenizer_module = load_tokenizer(model_args)
68 |     tokenizer = tokenizer_module["tokenizer"]
69 |     template = get_template_and_fix_tokenizer(tokenizer, data_args)
70 |     dataset_module = get_dataset(template, model_args, data_args, training_args, stage="sft", **tokenizer_module)
71 |     model = load_model(tokenizer, model_args, finetuning_args, training_args.do_train)
72 |     data_collator = DataCollatorWithVerbose(tokenizer=tokenizer)
73 |     trainer = CustomSeq2SeqTrainer(
74 |         model=model,
75 |         args=training_args,
76 |         finetuning_args=finetuning_args,
77 |         data_collator=data_collator,
78 |         **dataset_module,
79 |         **tokenizer_module,
80 |     )
81 |     trainer.train()
82 |     if disable_shuffling:
83 |         assert data_collator.verbose_list[0]["input_ids"] == dataset_module["train_dataset"][0]["input_ids"]
84 |     else:
85 |         assert data_collator.verbose_list[0]["input_ids"] != dataset_module["train_dataset"][0]["input_ids"]
86 | 


--------------------------------------------------------------------------------