├── .dockerignore
├── .env.local
├── .gitattributes
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── 1-bug-report.yml
    │   ├── 2-feature-request.yml
    │   └── config.yml
    ├── PULL_REQUEST_TEMPLATE.md
    ├── SECURITY.md
    └── workflows
    │   ├── docker.yml
    │   ├── label_issue.yml
    │   ├── publish.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CITATION.cff
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── README_zh.md
├── assets
    ├── logo.png
    ├── wechat.jpg
    └── wechat_npu.jpg
├── data
    ├── README.md
    ├── README_zh.md
    ├── alpaca_en_demo.json
    ├── alpaca_zh_demo.json
    ├── belle_multiturn
    │   └── belle_multiturn.py
    ├── c4_demo.jsonl
    ├── dataset_info.json
    ├── dpo_en_demo.json
    ├── dpo_zh_demo.json
    ├── glaive_toolcall_en_demo.json
    ├── glaive_toolcall_zh_demo.json
    ├── hh_rlhf_en
    │   └── hh_rlhf_en.py
    ├── identity.json
    ├── kto_en_demo.json
    ├── mllm_audio_demo.json
    ├── mllm_demo.json
    ├── mllm_demo_data
    │   ├── 1.jpg
    │   ├── 1.mp3
    │   ├── 1.mp4
    │   ├── 2.avi
    │   ├── 2.jpg
    │   ├── 2.wav
    │   ├── 3.flac
    │   ├── 3.jpg
    │   ├── 3.mp4
    │   ├── 4.mp3
    │   └── 4.mp4
    ├── mllm_video_audio_demo.json
    ├── mllm_video_demo.json
    ├── ultra_chat
    │   └── ultra_chat.py
    └── wiki_demo.txt
├── docker
    ├── docker-cuda
    │   ├── Dockerfile
    │   ├── Dockerfile.base
    │   └── docker-compose.yml
    ├── docker-npu
    │   ├── Dockerfile
    │   └── docker-compose.yml
    └── docker-rocm
    │   ├── Dockerfile
    │   └── docker-compose.yml
├── evaluation
    ├── ceval
    │   ├── ceval.py
    │   ├── ceval.zip
    │   └── mapping.json
    ├── cmmlu
    │   ├── cmmlu.py
    │   ├── cmmlu.zip
    │   └── mapping.json
    └── mmlu
    │   ├── mapping.json
    │   ├── mmlu.py
    │   └── mmlu.zip
├── examples
    ├── README.md
    ├── README_zh.md
    ├── accelerate
    │   ├── fsdp_config.yaml
    │   └── fsdp_config_offload.yaml
    ├── deepspeed
    │   ├── ds_z0_config.json
    │   ├── ds_z2_config.json
    │   ├── ds_z2_offload_config.json
    │   ├── ds_z3_config.json
    │   └── ds_z3_offload_config.json
    ├── extras
    │   ├── adam_mini
    │   │   └── qwen2_full_sft.yaml
    │   ├── apollo
    │   │   └── llama3_full_sft.yaml
    │   ├── badam
    │   │   └── llama3_full_sft.yaml
    │   ├── fsdp_qlora
    │   │   ├── llama3_lora_sft.yaml
    │   │   └── train.sh
    │   ├── galore
    │   │   └── llama3_full_sft.yaml
    │   ├── llama_pro
    │   │   ├── expand.sh
    │   │   └── llama3_freeze_sft.yaml
    │   ├── loraplus
    │   │   └── llama3_lora_sft.yaml
    │   ├── mod
    │   │   └── llama3_full_sft.yaml
    │   ├── muon
    │   │   └── qwen2_full_sft.yaml
    │   ├── nlg_eval
    │   │   └── llama3_lora_predict.yaml
    │   └── pissa
    │   │   ├── init.sh
    │   │   └── llama3_lora_sft.yaml
    ├── inference
    │   ├── llama3.yaml
    │   ├── llama3_full_sft.yaml
    │   ├── llama3_lora_sft.yaml
    │   └── qwen2_5vl.yaml
    ├── merge_lora
    │   ├── llama3_full_sft.yaml
    │   ├── llama3_gptq.yaml
    │   ├── llama3_lora_sft.yaml
    │   └── qwen2_5vl_lora_sft.yaml
    ├── train_full
    │   ├── llama3_full_sft.yaml
    │   └── qwen2_5vl_full_sft.yaml
    ├── train_lora
    │   ├── llama3_lora_dpo.yaml
    │   ├── llama3_lora_eval.yaml
    │   ├── llama3_lora_kto.yaml
    │   ├── llama3_lora_ppo.yaml
    │   ├── llama3_lora_pretrain.yaml
    │   ├── llama3_lora_reward.yaml
    │   ├── llama3_lora_sft.sh
    │   ├── llama3_lora_sft.yaml
    │   ├── llama3_lora_sft_ds3.yaml
    │   ├── llama3_lora_sft_ray.yaml
    │   ├── llama3_preprocess.yaml
    │   ├── llama4_lora_sft_ds3.yaml
    │   ├── qwen2_5vl_lora_dpo.yaml
    │   └── qwen2_5vl_lora_sft.yaml
    └── train_qlora
    │   ├── llama3_lora_sft_aqlm.yaml
    │   ├── llama3_lora_sft_awq.yaml
    │   ├── llama3_lora_sft_bnb_npu.yaml
    │   ├── llama3_lora_sft_gptq.yaml
    │   └── llama3_lora_sft_otfq.yaml
├── pyproject.toml
├── requirements.txt
├── scripts
    ├── api_example
    │   ├── test_image.py
    │   └── test_toolcall.py
    ├── convert_ckpt
    │   ├── llamafy_baichuan2.py
    │   ├── llamafy_qwen.py
    │   └── tiny_llama4.py
    ├── eval_bleu_rouge.py
    ├── llama_pro.py
    ├── loftq_init.py
    ├── pissa_init.py
    ├── qwen_omni_merge.py
    ├── stat_utils
    │   ├── cal_flops.py
    │   ├── cal_lr.py
    │   ├── cal_mfu.py
    │   ├── cal_ppl.py
    │   └── length_cdf.py
    └── vllm_infer.py
├── setup.py
├── src
    ├── api.py
    ├── llamafactory
    │   ├── __init__.py
    │   ├── api
    │   │   ├── __init__.py
    │   │   ├── app.py
    │   │   ├── chat.py
    │   │   ├── common.py
    │   │   └── protocol.py
    │   ├── chat
    │   │   ├── __init__.py
    │   │   ├── base_engine.py
    │   │   ├── chat_model.py
    │   │   ├── hf_engine.py
    │   │   ├── sglang_engine.py
    │   │   └── vllm_engine.py
    │   ├── cli.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── collator.py
    │   │   ├── converter.py
    │   │   ├── data_utils.py
    │   │   ├── formatter.py
    │   │   ├── loader.py
    │   │   ├── mm_plugin.py
    │   │   ├── parser.py
    │   │   ├── processor
    │   │   │   ├── __init__.py
    │   │   │   ├── feedback.py
    │   │   │   ├── pairwise.py
    │   │   │   ├── pretrain.py
    │   │   │   ├── processor_utils.py
    │   │   │   ├── supervised.py
    │   │   │   └── unsupervised.py
    │   │   ├── template.py
    │   │   └── tool_utils.py
    │   ├── eval
    │   │   ├── __init__.py
    │   │   ├── evaluator.py
    │   │   └── template.py
    │   ├── extras
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── env.py
    │   │   ├── logging.py
    │   │   ├── misc.py
    │   │   ├── packages.py
    │   │   └── ploting.py
    │   ├── hparams
    │   │   ├── __init__.py
    │   │   ├── data_args.py
    │   │   ├── evaluation_args.py
    │   │   ├── finetuning_args.py
    │   │   ├── generating_args.py
    │   │   ├── model_args.py
    │   │   ├── parser.py
    │   │   └── training_args.py
    │   ├── launcher.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── adapter.py
    │   │   ├── loader.py
    │   │   ├── model_utils
    │   │   │   ├── __init__.py
    │   │   │   ├── attention.py
    │   │   │   ├── checkpointing.py
    │   │   │   ├── embedding.py
    │   │   │   ├── kv_cache.py
    │   │   │   ├── liger_kernel.py
    │   │   │   ├── longlora.py
    │   │   │   ├── misc.py
    │   │   │   ├── mod.py
    │   │   │   ├── moe.py
    │   │   │   ├── packing.py
    │   │   │   ├── quantization.py
    │   │   │   ├── rope.py
    │   │   │   ├── unsloth.py
    │   │   │   ├── valuehead.py
    │   │   │   └── visual.py
    │   │   └── patcher.py
    │   ├── third_party
    │   │   ├── __init__.py
    │   │   └── muon
    │   │   │   ├── __init__.py
    │   │   │   └── muon.py
    │   ├── train
    │   │   ├── __init__.py
    │   │   ├── callbacks.py
    │   │   ├── dpo
    │   │   │   ├── __init__.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── kto
    │   │   │   ├── __init__.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── ppo
    │   │   │   ├── __init__.py
    │   │   │   ├── ppo_utils.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── pt
    │   │   │   ├── __init__.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── rm
    │   │   │   ├── __init__.py
    │   │   │   ├── metric.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── sft
    │   │   │   ├── __init__.py
    │   │   │   ├── metric.py
    │   │   │   ├── trainer.py
    │   │   │   └── workflow.py
    │   │   ├── test_utils.py
    │   │   ├── trainer_utils.py
    │   │   └── tuner.py
    │   └── webui
    │   │   ├── __init__.py
    │   │   ├── chatter.py
    │   │   ├── common.py
    │   │   ├── components
    │   │       ├── __init__.py
    │   │       ├── chatbot.py
    │   │       ├── data.py
    │   │       ├── eval.py
    │   │       ├── export.py
    │   │       ├── infer.py
    │   │       ├── top.py
    │   │       └── train.py
    │   │   ├── control.py
    │   │   ├── css.py
    │   │   ├── engine.py
    │   │   ├── interface.py
    │   │   ├── locales.py
    │   │   ├── manager.py
    │   │   └── runner.py
    ├── train.py
    └── webui.py
└── tests
    ├── check_license.py
    ├── data
        ├── processor
        │   ├── test_feedback.py
        │   ├── test_pairwise.py
        │   ├── test_processor_utils.py
        │   ├── test_supervised.py
        │   └── test_unsupervised.py
        ├── test_collator.py
        ├── test_converter.py
        ├── test_formatter.py
        ├── test_loader.py
        ├── test_mm_plugin.py
        └── test_template.py
    ├── e2e
        ├── test_chat.py
        ├── test_sglang.py
        └── test_train.py
    ├── eval
        └── test_eval_template.py
    ├── model
        ├── model_utils
        │   ├── test_add_tokens.py
        │   ├── test_attention.py
        │   ├── test_checkpointing.py
        │   ├── test_misc.py
        │   ├── test_packing.py
        │   └── test_visual.py
        ├── test_base.py
        ├── test_freeze.py
        ├── test_full.py
        ├── test_lora.py
        └── test_pissa.py
    ├── train
        └── test_sft_trainer.py
    └── version.txt


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | .git
 3 | .github
 4 | .venv
 5 | cache
 6 | docker
 7 | saves
 8 | hf_cache
 9 | ms_cache
10 | om_cache
11 | shared_data
12 | output
13 | .dockerignore
14 | .gitattributes
15 | .gitignore
16 | 


--------------------------------------------------------------------------------
/.env.local:
--------------------------------------------------------------------------------
 1 | # Note: actually we do not support .env, just for reference
 2 | # api
 3 | API_HOST=
 4 | API_PORT=
 5 | API_KEY=
 6 | API_MODEL_NAME=
 7 | API_VERBOSE=
 8 | FASTAPI_ROOT_PATH=
 9 | MAX_CONCURRENT=
10 | # general
11 | DISABLE_VERSION_CHECK=
12 | FORCE_CHECK_IMPORTS=
13 | ALLOW_EXTRA_ARGS=
14 | LLAMAFACTORY_VERBOSITY=
15 | USE_MODELSCOPE_HUB=
16 | USE_OPENMIND_HUB=
17 | USE_RAY=
18 | RECORD_VRAM=
19 | OPTIM_TORCH=
20 | NPU_JIT_COMPILE=
21 | # torchrun
22 | FORCE_TORCHRUN=
23 | MASTER_ADDR=
24 | MASTER_PORT=
25 | NNODES=
26 | NODE_RANK=
27 | NPROC_PER_NODE=
28 | # wandb
29 | WANDB_DISABLED=
30 | WANDB_PROJECT=
31 | WANDB_API_KEY=
32 | # gradio ui
33 | GRADIO_SHARE=
34 | GRADIO_SERVER_NAME=
35 | GRADIO_SERVER_PORT=
36 | GRADIO_ROOT_PATH=
37 | GRADIO_IPV6=
38 | # setup
39 | ENABLE_SHORT_CONSOLE=
40 | # reserved (do not use)
41 | LLAMABOARD_ENABLED=
42 | LLAMABOARD_WORKDIR=
43 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to LLaMA Factory
 2 | 
 3 | Everyone is welcome to contribute, and we value everybody's contribution. Code contributions are not the only way to help the community. Answering questions, helping others, and improving the documentation are also immensely valuable.
 4 | 
 5 | It also helps us if you spread the word! Reference the library in blog posts about the awesome projects it made possible, shout out on Twitter every time it has helped you, or simply ⭐️ the repository to say thank you.
 6 | 
 7 | However you choose to contribute, please be mindful and respect our [code of conduct](CODE_OF_CONDUCT.md).
 8 | 
 9 | **This guide was heavily inspired by [transformers guide to contributing](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md).**
10 | 
11 | ## Ways to contribute
12 | 
13 | There are several ways you can contribute to LLaMA Factory:
14 | 
15 | * Fix outstanding issues with the existing code.
16 | * Submit issues related to bugs or desired new features.
17 | * Contribute to the examples or to the documentation.
18 | 
19 | ### Style guide
20 | 
21 | LLaMA Factory follows the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html), check it for details.
22 | 
23 | ### Create a Pull Request
24 | 
25 | 1. Fork the [repository](https://github.com/hiyouga/LLaMA-Factory) by clicking on the [Fork](https://github.com/hiyouga/LLaMA-Factory/fork) button on the repository's page. This creates a copy of the code under your GitHub user account.
26 | 
27 | 2. Clone your fork to your local disk, and add the base repository as a remote:
28 | 
29 | ```bash
30 | git clone git@github.com:[username]/LLaMA-Factory.git
31 | cd LLaMA-Factory
32 | git remote add upstream https://github.com/hiyouga/LLaMA-Factory.git
33 | ```
34 | 
35 | 3. Create a new branch to hold your development changes:
36 | 
37 | ```bash
38 | git checkout -b dev_your_branch
39 | ```
40 | 
41 | 4. Set up a development environment by running the following command in a virtual environment:
42 | 
43 | ```bash
44 | pip install -e ".[dev]"
45 | ```
46 | 
47 | If LLaMA Factory was already installed in the virtual environment, remove it with `pip uninstall llamafactory` before reinstalling it in editable mode with the -e flag.
48 | 
49 | 5. Check code before commit:
50 | 
51 | ```bash
52 | make commit
53 | make style && make quality
54 | make test
55 | ```
56 | 
57 | 6. Submit changes:
58 | 
59 | ```bash
60 | git add .
61 | git commit -m "commit message"
62 | git fetch upstream
63 | git rebase upstream/main
64 | git push -u origin dev_your_branch
65 | ```
66 | 
67 | 7. Create a merge request from your branch `dev_your_branch` at [origin repo](https://github.com/hiyouga/LLaMA-Factory).
68 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/1-bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug / help"
 2 | description: Create a report to help us improve the LLaMA Factory
 3 | labels: ["bug", "pending"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Issues included in **[FAQs](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** or those with **insufficient** information may be closed without a response.
 9 |         已经包含在 **[常见问题](https://github.com/hiyouga/LLaMA-Factory/issues/4614)** 内或提供信息**不完整**的 issues 可能不会被回复。
10 | 
11 |   - type: markdown
12 |     attributes:
13 |       value: |
14 |         Please do not create issues that are not related to framework bugs under this category, use **[Discussions](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)** instead.
15 |         请勿在此分类下创建和框架 bug 无关的 issues，训练问题求助请使用 **[讨论区](https://github.com/hiyouga/LLaMA-Factory/discussions/categories/q-a)**。
16 | 
17 |   - type: checkboxes
18 |     id: reminder
19 |     attributes:
20 |       label: Reminder
21 |       description: |
22 |         Please ensure you have read the above rules carefully and searched the existing issues (including FAQs).
23 |         请确保您已经认真阅读了上述规则并且搜索过现有的 issues（包括常见问题）。
24 | 
25 |       options:
26 |         - label: I have read the above rules and searched the existing issues.
27 |           required: true
28 | 
29 |   - type: textarea
30 |     id: system-info
31 |     validations:
32 |       required: true
33 |     attributes:
34 |       label: System Info
35 |       description: |
36 |         Please share your system info with us. You can run the command **llamafactory-cli env** and copy-paste its output below.
37 |         请提供您的系统信息。您可以在命令行运行 **llamafactory-cli env** 并将其输出复制到该文本框中。
38 | 
39 |       placeholder: llamafactory version, platform, python version, ...
40 | 
41 |   - type: textarea
42 |     id: reproduction
43 |     validations:
44 |       required: true
45 |     attributes:
46 |       label: Reproduction
47 |       description: |
48 |         Please provide entry arguments, error messages and stack traces that reproduces the problem.
49 |         请提供入口参数，错误日志以及异常堆栈以便于我们复现问题。
50 | 
51 |       value: |
52 |         ```text
53 |         Put your message here.
54 |         ```
55 | 
56 |   - type: textarea
57 |     id: others
58 |     validations:
59 |       required: false
60 |     attributes:
61 |       label: Others
62 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/2-feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F680 Feature request"
 2 | description: Submit a request for a new feature
 3 | labels: ["enhancement", "pending"]
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         Please do not create issues that are not related to new features under this category.
 9 |         请勿在此分类下创建和新特性无关的 issues。
10 | 
11 |   - type: checkboxes
12 |     id: reminder
13 |     attributes:
14 |       label: Reminder
15 |       description: |
16 |         Please ensure you have read the above rules carefully and searched the existing issues.
17 |         请确保您已经认真阅读了上述规则并且搜索过现有的 issues。
18 | 
19 |       options:
20 |         - label: I have read the above rules and searched the existing issues.
21 |           required: true
22 | 
23 |   - type: textarea
24 |     id: description
25 |     validations:
26 |       required: true
27 |     attributes:
28 |       label: Description
29 |       description: |
30 |         A clear and concise description of the feature proposal.
31 |         请详细描述您希望加入的新功能特性。
32 | 
33 |   - type: textarea
34 |     id: contribution
35 |     validations:
36 |       required: false
37 |     attributes:
38 |       label: Pull Request
39 |       description: |
40 |         Have you already created the relevant PR and submitted the code?
41 |         您是否已经创建了相关 PR 并提交了代码？
42 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # What does this PR do?
2 | 
3 | Fixes # (issue)
4 | 
5 | ## Before submitting
6 | 
7 | - [ ] Did you read the [contributor guideline](https://github.com/hiyouga/LLaMA-Factory/blob/main/.github/CONTRIBUTING.md)?
8 | - [ ] Did you write any new necessary tests?
9 | 


--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Reporting Security Issues
2 | 
3 | To report a security issue, please use the GitHub Security Advisory ["Report a Vulnerability"](https://github.com/hiyouga/LLaMA-Factory/security/advisories/new) tab.
4 | 
5 | We will send a response indicating the next steps in handling your report. After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
6 | 
7 | Report security bugs in third-party modules to the person or team maintaining the module.
8 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: docker
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - "main"
 8 |     paths:
 9 |       - "**/*.py"
10 |       - "requirements.txt"
11 |       - "docker/**"
12 |       - ".github/workflows/*.yml"
13 |   pull_request:
14 |     branches:
15 |       - "main"
16 |     paths:
17 |       - "**/*.py"
18 |       - "requirements.txt"
19 |       - "docker/**"
20 |       - ".github/workflows/*.yml"
21 | 
22 | jobs:
23 |   build:
24 |     runs-on: ubuntu-latest
25 | 
26 |     concurrency:
27 |       group: ${{ github.workflow }}-${{ github.ref }}
28 |       cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
29 | 
30 |     environment:
31 |       name: docker
32 |       url: https://hub.docker.com/r/hiyouga/llamafactory
33 | 
34 |     steps:
35 |       - name: Free up disk space
36 |         run: |
37 |           df -h
38 |           sudo rm -rf /usr/share/dotnet
39 |           sudo rm -rf /opt/ghc
40 |           sudo rm -rf /opt/hostedtoolcache
41 |           df -h
42 | 
43 |       - name: Checkout
44 |         uses: actions/checkout@v4
45 | 
46 |       - name: Set up Docker Buildx
47 |         uses: docker/setup-buildx-action@v3
48 | 
49 |       - name: Login to Docker Hub
50 |         if: github.event_name != 'pull_request'
51 |         uses: docker/login-action@v3
52 |         with:
53 |           username: ${{ vars.DOCKERHUB_USERNAME }}
54 |           password: ${{ secrets.DOCKERHUB_TOKEN }}
55 | 
56 |       - name: Build and push Docker image
57 |         uses: docker/build-push-action@v6
58 |         with:
59 |           context: .
60 |           file: ./docker/docker-cuda/Dockerfile
61 |           build-args: |
62 |             EXTRAS=metrics,deepspeed,liger-kernel
63 |           push: ${{ github.event_name != 'pull_request' }}
64 |           tags: docker.io/hiyouga/llamafactory:latest
65 |           cache-from: type=gha
66 |           cache-to: type=gha,mode=max
67 | 


--------------------------------------------------------------------------------
/.github/workflows/label_issue.yml:
--------------------------------------------------------------------------------
 1 | name: label_issue
 2 | 
 3 | on:
 4 |   issues:
 5 |     types:
 6 |       - opened
 7 | 
 8 | jobs:
 9 |   label_issue:
10 |     runs-on: ubuntu-latest
11 | 
12 |     permissions:
13 |       issues: write
14 | 
15 |     steps:
16 |       - env:
17 |           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |           ISSUE_URL: ${{ github.event.issue.html_url }}
19 |           ISSUE_TITLE: ${{ github.event.issue.title }}
20 |         run: |
21 |           LABEL=""
22 |           NPU_KEYWORDS=(npu huawei ascend 华为 昇腾)
23 |           ISSUE_TITLE_LOWER=$(echo $ISSUE_TITLE | tr '[:upper:]' '[:lower:]')
24 |           for KEYWORD in ${NPU_KEYWORDS[@]}; do
25 |             if [[ $ISSUE_TITLE_LOWER == *$KEYWORD* ]] && [[ $ISSUE_TITLE_LOWER != *input* ]]; then
26 |               LABEL="npu"
27 |               break
28 |             fi
29 |           done
30 |           if [ -n "$LABEL" ]; then
31 |             gh issue edit $ISSUE_URL --add-label $LABEL
32 |           fi
33 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: publish
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   release:
 6 |     types:
 7 |       - published
 8 | 
 9 | jobs:
10 |   publish:
11 |     name: Upload release to PyPI
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     environment:
16 |       name: release
17 |       url: https://pypi.org/p/llamafactory
18 | 
19 |     permissions:
20 |       id-token: write
21 | 
22 |     steps:
23 |       - name: Checkout
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Set up Python
27 |         uses: actions/setup-python@v5
28 |         with:
29 |           python-version: "3.9"
30 | 
31 |       - name: Build package
32 |         run: |
33 |           make build
34 | 
35 |       - name: Publish package
36 |         uses: pypa/gh-action-pypi-publish@release/v1
37 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: tests
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches:
  7 |       - "main"
  8 |     paths:
  9 |       - "**.py"
 10 |       - "requirements.txt"
 11 |       - ".github/workflows/*.yml"
 12 |   pull_request:
 13 |     branches:
 14 |       - "main"
 15 |     paths:
 16 |       - "**.py"
 17 |       - "requirements.txt"
 18 |       - ".github/workflows/*.yml"
 19 | 
 20 | jobs:
 21 |   tests:
 22 |     strategy:
 23 |       fail-fast: false
 24 |       matrix:
 25 |         python:
 26 |           - "3.9"
 27 |           - "3.10"
 28 |           - "3.11"
 29 |           - "3.12"
 30 |         os:
 31 |           - "ubuntu-latest"
 32 |           - "windows-latest"
 33 |           - "macos-13"
 34 |         transformers:
 35 |           - null
 36 |         include:  # test backward compatibility
 37 |           - python: "3.9"
 38 |             os: "ubuntu-latest"
 39 |             transformers: "4.45.0"
 40 |           - python: "3.9"
 41 |             os: "ubuntu-latest"
 42 |             transformers: "4.49.0"
 43 |           - python: "3.9"
 44 |             os: "ubuntu-latest"
 45 |             transformers: "4.51.0"
 46 | 
 47 |     runs-on: ${{ matrix.os }}
 48 | 
 49 |     concurrency:
 50 |       group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}
 51 |       cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 52 | 
 53 |     env:
 54 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
 55 |       OS_NAME: ${{ matrix.os }}
 56 | 
 57 |     steps:
 58 |       - name: Checkout
 59 |         uses: actions/checkout@v4
 60 | 
 61 |       - name: Set up Python
 62 |         uses: actions/setup-python@v5
 63 |         with:
 64 |           python-version: ${{ matrix.python }}
 65 |           cache: "pip"
 66 |           cache-dependency-path: "**/requirements*.txt"
 67 | 
 68 |       - name: Install dependencies
 69 |         run: |
 70 |           python -m pip install --upgrade pip
 71 |           python -m pip install ".[torch,dev]"
 72 | 
 73 |       - name: Install transformers
 74 |         if: ${{ matrix.transformers }}
 75 |         run: |
 76 |           python -m pip install "transformers==${{ matrix.transformers }}"
 77 | 
 78 |       - name: Cache files
 79 |         id: hf-hub-cache
 80 |         uses: actions/cache@v4
 81 |         with:
 82 |           path: ${{ runner.temp }}/huggingface
 83 |           key: huggingface-${{ matrix.os }}-${{ matrix.python }}-${{ matrix.transformers }}-${{ hashFiles('tests/version.txt') }}
 84 | 
 85 |       - name: Check quality
 86 |         run: |
 87 |           make style && make quality
 88 | 
 89 |       - name: Check license
 90 |         run: |
 91 |           make license
 92 | 
 93 |       - name: Check build
 94 |         run: |
 95 |           make build
 96 | 
 97 |       - name: Test with pytest
 98 |         run: |
 99 |           make test
100 |         env:
101 |           HF_HOME: ${{ runner.temp }}/huggingface
102 |           HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"
103 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v5.0.0
 4 |     hooks:
 5 |     -   id: check-ast
 6 |     -   id: check-added-large-files
 7 |         args: ['--maxkb=25000']
 8 |     -   id: check-merge-conflict
 9 |     -   id: check-yaml
10 |     -   id: debug-statements
11 |     -   id: end-of-file-fixer
12 |     -   id: trailing-whitespace
13 |         args: [--markdown-linebreak-ext=md]
14 |     -   id: no-commit-to-branch
15 |         args: ['--branch', 'main']
16 | 
17 | -   repo: https://github.com/asottile/pyupgrade
18 |     rev: v3.17.0
19 |     hooks:
20 |     -   id: pyupgrade
21 |         args: [--py38-plus]
22 | 
23 | -   repo: https://github.com/astral-sh/ruff-pre-commit
24 |     rev: v0.6.9
25 |     hooks:
26 |     -   id: ruff
27 |         args: [--fix]
28 |     -   id: ruff-format
29 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | date-released: 2024-03
 3 | message: "If you use this software, please cite it as below."
 4 | authors:
 5 | - family-names: "Zheng"
 6 |   given-names: "Yaowei"
 7 | - family-names: "Zhang"
 8 |   given-names: "Richong"
 9 | - family-names: "Zhang"
10 |   given-names: "Junhao"
11 | - family-names: "Ye"
12 |   given-names: "Yanhan"
13 | - family-names: "Luo"
14 |   given-names: "Zheyan"
15 | - family-names: "Feng"
16 |   given-names: "Zhangchi"
17 | - family-names: "Ma"
18 |   given-names: "Yongqiang"
19 | title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
20 | url: "https://arxiv.org/abs/2403.13372"
21 | preferred-citation:
22 |   type: conference-paper
23 |   conference:
24 |     name: "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)"
25 |   authors:
26 |     - family-names: "Zheng"
27 |       given-names: "Yaowei"
28 |     - family-names: "Zhang"
29 |       given-names: "Richong"
30 |     - family-names: "Zhang"
31 |       given-names: "Junhao"
32 |     - family-names: "Ye"
33 |       given-names: "Yanhan"
34 |     - family-names: "Luo"
35 |       given-names: "Zheyan"
36 |     - family-names: "Feng"
37 |       given-names: "Zhangchi"
38 |     - family-names: "Ma"
39 |       given-names: "Yongqiang"
40 |   title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
41 |   url: "https://arxiv.org/abs/2403.13372"
42 |   year: 2024
43 |   publisher: "Association for Computational Linguistics"
44 |   address: "Bangkok, Thailand"
45 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE requirements.txt
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: build commit license quality style test
 2 | 
 3 | check_dirs := scripts src tests setup.py
 4 | 
 5 | build:
 6 | 	pip3 install build && python3 -m build
 7 | 
 8 | commit:
 9 | 	pre-commit install
10 | 	pre-commit run --all-files
11 | 
12 | license:
13 | 	python3 tests/check_license.py $(check_dirs)
14 | 
15 | quality:
16 | 	ruff check $(check_dirs)
17 | 	ruff format --check $(check_dirs)
18 | 
19 | style:
20 | 	ruff check $(check_dirs) --fix
21 | 	ruff format $(check_dirs)
22 | 
23 | test:
24 | 	CUDA_VISIBLE_DEVICES= WANDB_DISABLED=true pytest -vv tests/
25 | 


--------------------------------------------------------------------------------
/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/assets/logo.png


--------------------------------------------------------------------------------
/assets/wechat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/assets/wechat.jpg


--------------------------------------------------------------------------------
/assets/wechat_npu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/assets/wechat_npu.jpg


--------------------------------------------------------------------------------
/data/mllm_audio_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<audio>What's that sound?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "It is the sound of glass shattering.",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "audios": [
14 |       "mllm_demo_data/1.mp3"
15 |     ]
16 |   },
17 |   {
18 |     "messages": [
19 |       {
20 |         "content": "<audio>What can you hear?",
21 |         "role": "user"
22 |       },
23 |       {
24 |         "content": "A woman is coughing.",
25 |         "role": "assistant"
26 |       }
27 |     ],
28 |     "audios": [
29 |       "mllm_demo_data/2.wav"
30 |     ]
31 |   },
32 |   {
33 |     "messages": [
34 |       {
35 |         "content": "<audio>What does the person say?",
36 |         "role": "user"
37 |       },
38 |       {
39 |         "content": "Mister Quiller is the apostle of the middle classes and we are glad to welcome his gospel.",
40 |         "role": "assistant"
41 |       }
42 |     ],
43 |     "audios": [
44 |       "mllm_demo_data/3.flac"
45 |     ]
46 |   }
47 | ]
48 | 


--------------------------------------------------------------------------------
/data/mllm_demo_data/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/1.jpg


--------------------------------------------------------------------------------
/data/mllm_demo_data/1.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/1.mp3


--------------------------------------------------------------------------------
/data/mllm_demo_data/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/1.mp4


--------------------------------------------------------------------------------
/data/mllm_demo_data/2.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/2.avi


--------------------------------------------------------------------------------
/data/mllm_demo_data/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/2.jpg


--------------------------------------------------------------------------------
/data/mllm_demo_data/2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/2.wav


--------------------------------------------------------------------------------
/data/mllm_demo_data/3.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/3.flac


--------------------------------------------------------------------------------
/data/mllm_demo_data/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/3.jpg


--------------------------------------------------------------------------------
/data/mllm_demo_data/3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/3.mp4


--------------------------------------------------------------------------------
/data/mllm_demo_data/4.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/4.mp3


--------------------------------------------------------------------------------
/data/mllm_demo_data/4.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/data/mllm_demo_data/4.mp4


--------------------------------------------------------------------------------
/data/mllm_video_audio_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<video><audio>What is the video describing?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "A girl who is drawing a picture of a guitar and feel nervous.",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "videos": [
14 |       "mllm_demo_data/4.mp4"
15 |     ],
16 |     "audios": [
17 |       "mllm_demo_data/4.mp3"
18 |     ]
19 |   },
20 |   {
21 |     "messages": [
22 |       {
23 |         "content": "<video><audio>What does this girl say?",
24 |         "role": "user"
25 |       },
26 |       {
27 |         "content": "She says: 'Hello! Take a look at what am I drawing!'",
28 |         "role": "assistant"
29 |       }
30 |     ],
31 |     "videos": [
32 |       "mllm_demo_data/4.mp4"
33 |     ],
34 |     "audios": [
35 |       "mllm_demo_data/4.mp3"
36 |     ]
37 |   },
38 |   {
39 |     "messages": [
40 |       {
41 |         "content": "<video><audio>What is this girl drawing with?",
42 |         "role": "user"
43 |       },
44 |       {
45 |         "content": "She is drawing with an iPad.",
46 |         "role": "assistant"
47 |       }
48 |     ],
49 |     "videos": [
50 |       "mllm_demo_data/4.mp4"
51 |     ],
52 |     "audios": [
53 |       "mllm_demo_data/4.mp3"
54 |     ]
55 |   }
56 | ]
57 | 


--------------------------------------------------------------------------------
/data/mllm_video_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "messages": [
 4 |       {
 5 |         "content": "<video>Why is this video funny?",
 6 |         "role": "user"
 7 |       },
 8 |       {
 9 |         "content": "Because a baby is reading, and he is so cute!",
10 |         "role": "assistant"
11 |       }
12 |     ],
13 |     "videos": [
14 |       "mllm_demo_data/1.mp4"
15 |     ]
16 |   },
17 |   {
18 |     "messages": [
19 |       {
20 |         "content": "<video>What is she doing?",
21 |         "role": "user"
22 |       },
23 |       {
24 |         "content": "She is cooking.",
25 |         "role": "assistant"
26 |       }
27 |     ],
28 |     "videos": [
29 |       "mllm_demo_data/2.avi"
30 |     ]
31 |   },
32 |   {
33 |     "messages": [
34 |       {
35 |         "content": "<video>What's in the video?",
36 |         "role": "user"
37 |       },
38 |       {
39 |         "content": "A baby is playing in the living room.",
40 |         "role": "assistant"
41 |       }
42 |     ],
43 |     "videos": [
44 |       "mllm_demo_data/3.mp4"
45 |     ]
46 |   }
47 | ]
48 | 


--------------------------------------------------------------------------------
/data/ultra_chat/ultra_chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import json
17 | import os
18 | 
19 | import datasets
20 | 
21 | 
22 | _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
23 | 
24 | _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
25 | 
26 | _CITATION = """\
27 | @misc{UltraChat,
28 |   author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and others},
29 |   title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
30 |   year = {2023},
31 |   publisher = {GitHub},
32 |   journal = {GitHub repository},
33 |   howpublished = {\\url{https://github.com/thunlp/ultrachat}},
34 | }
35 | """
36 | 
37 | _HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
38 | _LICENSE = "cc-by-nc-4.0"
39 | _BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
40 | 
41 | 
42 | class UltraChat(datasets.GeneratorBasedBuilder):
43 |     VERSION = datasets.Version("0.0.0")
44 | 
45 |     def _info(self):
46 |         features = datasets.Features(
47 |             {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
48 |         )
49 |         return datasets.DatasetInfo(
50 |             description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
51 |         )
52 | 
53 |     def _split_generators(self, dl_manager: datasets.DownloadManager):
54 |         file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)]  # multiple shards
55 |         return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
56 | 
57 |     def _generate_examples(self, filepaths: list[str]):
58 |         for filepath in filepaths:
59 |             with open(filepath, encoding="utf-8") as f:
60 |                 for row in f:
61 |                     try:
62 |                         data = json.loads(row)
63 |                     except Exception:
64 |                         continue
65 |                     key: int = data["id"]
66 |                     content: list[str] = data["data"]
67 |                     if len(content) % 2 == 1:
68 |                         content.pop(-1)
69 |                     if len(content) < 2:
70 |                         continue
71 |                     conversations = [
72 |                         {"from": "human" if i % 2 == 0 else "gpt", "value": content[i]} for i in range(len(content))
73 |                     ]
74 |                     yield key, {"conversations": conversations}
75 | 


--------------------------------------------------------------------------------
/docker/docker-cuda/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/r/hiyouga/pytorch/tags
 2 | ARG BASE_IMAGE=hiyouga/pytorch:th2.6.0-cu124-flashattn2.7.4-cxx11abi0-devel
 3 | FROM ${BASE_IMAGE}
 4 | 
 5 | # Installation arguments
 6 | ARG PIP_INDEX=https://pypi.org/simple
 7 | ARG EXTRAS=metrics
 8 | ARG INSTALL_FLASHATTN=false
 9 | ARG HTTP_PROXY=""
10 | 
11 | # Define environments
12 | ENV MAX_JOBS=16
13 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
14 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
15 | ENV DEBIAN_FRONTEND=noninteractive
16 | ENV NODE_OPTIONS=""
17 | ENV PIP_ROOT_USER_ACTION=ignore
18 | ENV http_proxy="${HTTP_PROXY}"
19 | ENV https_proxy="${HTTP_PROXY}"
20 | 
21 | # Use Bash instead of default /bin/sh
22 | SHELL ["/bin/bash", "-c"]
23 | 
24 | # Set the working directory
25 | WORKDIR /app
26 | 
27 | # Change pip source
28 | RUN pip config set global.index-url "${PIP_INDEX}" && \
29 |     pip config set global.extra-index-url "${PIP_INDEX}" && \
30 |     python -m pip install --upgrade pip
31 | 
32 | # Install the requirements
33 | COPY requirements.txt /app
34 | RUN pip install --no-cache-dir -r requirements.txt
35 | 
36 | # Copy the rest of the application into the image
37 | COPY . /app
38 | 
39 | # Install LLaMA Factory
40 | RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
41 | 
42 | # Rebuild flash attention
43 | RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
44 |         pip uninstall -y ninja && \
45 |         pip install --no-cache-dir ninja && \
46 |         pip install --no-cache-dir flash-attn --no-build-isolation; \
47 |     fi
48 | 
49 | # Set up volumes
50 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
51 | 
52 | # Expose port 7860 for LLaMA Board
53 | ENV GRADIO_SERVER_PORT=7860
54 | EXPOSE 7860
55 | 
56 | # Expose port 8000 for API service
57 | ENV API_PORT=8000
58 | EXPOSE 8000
59 | 
60 | # unset proxy
61 | ENV http_proxy=
62 | ENV https_proxy=
63 | 
64 | # Reset pip config
65 | RUN pip config unset global.index-url && \
66 |     pip config unset global.extra-index-url
67 | 


--------------------------------------------------------------------------------
/docker/docker-cuda/Dockerfile.base:
--------------------------------------------------------------------------------
 1 | # Start from the pytorch official image (ubuntu-22.04 + cuda-12.4.1 + python-3.11)
 2 | # https://hub.docker.com/r/pytorch/pytorch/tags
 3 | FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel
 4 | 
 5 | # Define environments
 6 | ENV MAX_JOBS=16
 7 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 8 | ENV DEBIAN_FRONTEND=noninteractive
 9 | ENV NODE_OPTIONS=""
10 | ENV PIP_ROOT_USER_ACTION=ignore
11 | 
12 | # Define installation arguments
13 | ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/
14 | ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
15 | 
16 | # Set apt source
17 | RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
18 |     { \
19 |     echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \
20 |     echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \
21 |     echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \
22 |     echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \
23 |     } > /etc/apt/sources.list
24 | 
25 | # Install systemctl and wget
26 | RUN apt-get update && \
27 |     apt-get install -y -o Dpkg::Options::="--force-confdef" systemd wget && \
28 |     apt-get clean
29 | 
30 | # Install git and vim
31 | RUN apt-get update && \
32 |     apt-get install -y git vim && \
33 |     apt-get clean
34 | 
35 | # Install gcc and g++
36 | RUN apt-get update && \
37 |     apt-get install -y gcc g++ && \
38 |     apt-get clean
39 | 
40 | # Change pip source
41 | RUN pip config set global.index-url "${PIP_INDEX}" && \
42 |     pip config set global.extra-index-url "${PIP_INDEX}" && \
43 |     python -m pip install --upgrade pip
44 | 
45 | # Install flash-attn-2.7.4.post1 (cxx11abi=False)
46 | RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl && \
47 |     pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
48 | 
49 | # Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
50 | RUN wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
51 |     pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
52 | 
53 | # Reset pip config
54 | RUN pip config unset global.index-url && \
55 |     pip config unset global.extra-index-url
56 | 


--------------------------------------------------------------------------------
/docker/docker-cuda/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-cuda/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         PIP_INDEX: https://pypi.org/simple
 8 |         EXTRAS: metrics
 9 |     container_name: llamafactory
10 |     volumes:
11 |       - ../../hf_cache:/root/.cache/huggingface
12 |       - ../../ms_cache:/root/.cache/modelscope
13 |       - ../../om_cache:/root/.cache/openmind
14 |       - ../../shared_data:/app/shared_data
15 |       - ../../output:/app/output
16 |     ports:
17 |       - "7860:7860"
18 |       - "8000:8000"
19 |     ipc: host
20 |     tty: true
21 |     # shm_size: "16gb"  # ipc: host is set
22 |     stdin_open: true
23 |     command: bash
24 |     deploy:
25 |       resources:
26 |         reservations:
27 |           devices:
28 |           - driver: nvidia
29 |             count: "all"
30 |             capabilities: [ gpu ]
31 |     restart: unless-stopped
32 | 


--------------------------------------------------------------------------------
/docker/docker-npu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/r/ascendai/cann/tags
 2 | ARG BASE_IMAGE=ascendai/cann:8.0.0-910b-ubuntu22.04-py3.11
 3 | FROM ${BASE_IMAGE}
 4 | 
 5 | # Installation arguments
 6 | ARG PIP_INDEX=https://pypi.org/simple
 7 | ARG EXTRAS=torch-npu,metrics
 8 | ARG HTTP_PROXY=""
 9 | 
10 | # Define environments
11 | ENV MAX_JOBS=16
12 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
13 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
14 | ENV DEBIAN_FRONTEND=noninteractive
15 | ENV NODE_OPTIONS=""
16 | ENV PIP_ROOT_USER_ACTION=ignore
17 | ENV http_proxy="${HTTP_PROXY}"
18 | ENV https_proxy="${HTTP_PROXY}"
19 | 
20 | # Use Bash instead of default /bin/sh
21 | SHELL ["/bin/bash", "-c"]
22 | 
23 | # Set the working directory
24 | WORKDIR /app
25 | 
26 | # Change pip source
27 | RUN pip config set global.index-url "${PIP_INDEX}" && \
28 |     pip config set global.extra-index-url "${PIP_INDEX}" && \
29 |     python -m pip install --upgrade pip
30 | 
31 | # Install the requirements
32 | COPY requirements.txt /app
33 | RUN pip install --no-cache-dir -r requirements.txt
34 | 
35 | # Copy the rest of the application into the image
36 | COPY . /app
37 | 
38 | # Install LLaMA Factory
39 | RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
40 | 
41 | # Set up volumes
42 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
43 | 
44 | # Expose port 7860 for LLaMA Board
45 | ENV GRADIO_SERVER_PORT=7860
46 | EXPOSE 7860
47 | 
48 | # Expose port 8000 for API service
49 | ENV API_PORT=8000
50 | EXPOSE 8000
51 | 
52 | # unset proxy
53 | ENV http_proxy=
54 | ENV https_proxy=
55 | 
56 | # Reset pip config
57 | RUN pip config unset global.index-url && \
58 |     pip config unset global.extra-index-url
59 | 


--------------------------------------------------------------------------------
/docker/docker-npu/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-npu/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         PIP_INDEX: https://pypi.org/simple
 8 |         EXTRAS: torch-npu,metrics
 9 |     container_name: llamafactory
10 |     volumes:
11 |       - ../../hf_cache:/root/.cache/huggingface
12 |       - ../../ms_cache:/root/.cache/modelscope
13 |       - ../../om_cache:/root/.cache/openmind
14 |       - ../../shared_data:/app/shared_data
15 |       - ../../output:/app/output
16 |       - /usr/local/dcmi:/usr/local/dcmi
17 |       - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
18 |       - /usr/local/Ascend/driver:/usr/local/Ascend/driver
19 |       - /etc/ascend_install.info:/etc/ascend_install.info
20 |     ports:
21 |       - "7860:7860"
22 |       - "8000:8000"
23 |     ipc: host
24 |     tty: true
25 |     # shm_size: "16gb"  # ipc: host is set
26 |     stdin_open: true
27 |     command: bash
28 |     devices:
29 |       - /dev/davinci0
30 |       - /dev/davinci_manager
31 |       - /dev/devmm_svm
32 |       - /dev/hisi_hdc
33 |     restart: unless-stopped
34 | 


--------------------------------------------------------------------------------
/docker/docker-rocm/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://hub.docker.com/r/rocm/pytorch/tags
 2 | ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
 3 | FROM ${BASE_IMAGE}
 4 | 
 5 | # Installation arguments
 6 | ARG PIP_INDEX=https://pypi.org/simple
 7 | ARG EXTRAS=metrics
 8 | ARG INSTALL_FLASHATTN=false
 9 | ARG HTTP_PROXY=""
10 | ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
11 | 
12 | # Define environments
13 | ENV MAX_JOBS=16
14 | ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
15 | ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
16 | ENV DEBIAN_FRONTEND=noninteractive
17 | ENV NODE_OPTIONS=""
18 | ENV PIP_ROOT_USER_ACTION=ignore
19 | ENV http_proxy="${HTTP_PROXY}"
20 | ENV https_proxy="${HTTP_PROXY}"
21 | 
22 | # Use Bash instead of default /bin/sh
23 | SHELL ["/bin/bash", "-c"]
24 | 
25 | # Set the working directory
26 | WORKDIR /app
27 | 
28 | # Change pip source
29 | RUN pip config set global.index-url "${PIP_INDEX}" && \
30 |     pip config set global.extra-index-url "${PIP_INDEX}" && \
31 |     python -m pip install --upgrade pip
32 | 
33 | # Reinstall pytorch rocm
34 | RUN pip uninstall -y torch torchvision torchaudio && \
35 |     pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"
36 | 
37 | # Install the requirements
38 | COPY requirements.txt /app
39 | RUN pip install --no-cache-dir -r requirements.txt
40 | 
41 | # Copy the rest of the application into the image
42 | COPY . /app
43 | 
44 | # Install LLaMA Factory
45 | RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation
46 | 
47 | # Rebuild flash attention
48 | RUN if [ "${INSTALL_FLASHATTN}" == "true" ]; then \
49 |         pip uninstall -y ninja && \
50 |         pip install --no-cache-dir ninja && \
51 |         pip install --no-cache-dir flash-attn --no-build-isolation; \
52 |     fi
53 | 
54 | # Set up volumes
55 | VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/shared_data", "/app/output" ]
56 | 
57 | # Expose port 7860 for LLaMA Board
58 | ENV GRADIO_SERVER_PORT=7860
59 | EXPOSE 7860
60 | 
61 | # Expose port 8000 for API service
62 | ENV API_PORT=8000
63 | EXPOSE 8000
64 | 
65 | # unset proxy
66 | ENV http_proxy=
67 | ENV https_proxy=
68 | 
69 | # Reset pip config
70 | RUN pip config unset global.index-url && \
71 |     pip config unset global.extra-index-url
72 | 


--------------------------------------------------------------------------------
/docker/docker-rocm/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   llamafactory:
 3 |     build:
 4 |       dockerfile: ./docker/docker-rocm/Dockerfile
 5 |       context: ../..
 6 |       args:
 7 |         PIP_INDEX: https://pypi.org/simple
 8 |         EXTRAS: metrics
 9 |     container_name: llamafactory
10 |     volumes:
11 |       - ../../hf_cache:/root/.cache/huggingface
12 |       - ../../ms_cache:/root/.cache/modelscope
13 |       - ../../om_cache:/root/.cache/openmind
14 |       - ../../shared_data:/app/shared_data
15 |       - ../../output:/app/output
16 |     ports:
17 |       - "7860:7860"
18 |       - "8000:8000"
19 |     ipc: host
20 |     tty: true
21 |     # shm_size: "16gb"  # ipc: host is set
22 |     stdin_open: true
23 |     command: bash
24 |     devices:
25 |       - /dev/kfd:/dev/kfd
26 |       - /dev/dri:/dev/dri
27 |     restart: unless-stopped
28 | 


--------------------------------------------------------------------------------
/evaluation/ceval/ceval.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/evaluation/ceval/ceval.zip


--------------------------------------------------------------------------------
/evaluation/cmmlu/cmmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/evaluation/cmmlu/cmmlu.zip


--------------------------------------------------------------------------------
/evaluation/mmlu/mmlu.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/evaluation/mmlu/mmlu.zip


--------------------------------------------------------------------------------
/examples/accelerate/fsdp_config.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_forward_prefetch: false
 9 |   fsdp_cpu_ram_efficient_loading: true
10 |   fsdp_offload_params: false
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: bf16  # or fp16
18 | num_machines: 1  # the number of nodes
19 | num_processes: 2  # the number of GPUs in all nodes
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/examples/accelerate/fsdp_config_offload.yaml:
--------------------------------------------------------------------------------
 1 | compute_environment: LOCAL_MACHINE
 2 | debug: false
 3 | distributed_type: FSDP
 4 | downcast_bf16: 'no'
 5 | fsdp_config:
 6 |   fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
 7 |   fsdp_backward_prefetch: BACKWARD_PRE
 8 |   fsdp_forward_prefetch: false
 9 |   fsdp_cpu_ram_efficient_loading: true
10 |   fsdp_offload_params: true  # offload may affect training speed
11 |   fsdp_sharding_strategy: FULL_SHARD
12 |   fsdp_state_dict_type: FULL_STATE_DICT
13 |   fsdp_sync_module_states: true
14 |   fsdp_use_orig_params: true
15 | machine_rank: 0
16 | main_training_function: main
17 | mixed_precision: bf16  # or fp16
18 | num_machines: 1  # the number of nodes
19 | num_processes: 2  # the number of GPUs in all nodes
20 | rdzv_backend: static
21 | same_network: true
22 | tpu_env: []
23 | tpu_use_cluster: false
24 | tpu_use_sudo: false
25 | use_cpu: false
26 | 


--------------------------------------------------------------------------------
/examples/deepspeed/ds_z0_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 0,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": false,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/deepspeed/ds_z2_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "allgather_partitions": true,
21 |     "allgather_bucket_size": 5e8,
22 |     "overlap_comm": false,
23 |     "reduce_scatter": true,
24 |     "reduce_bucket_size": 5e8,
25 |     "contiguous_gradients": true,
26 |     "round_robin_gradients": true
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/deepspeed/ds_z2_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 2,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "allgather_partitions": true,
25 |     "allgather_bucket_size": 5e8,
26 |     "overlap_comm": false,
27 |     "reduce_scatter": true,
28 |     "reduce_bucket_size": 5e8,
29 |     "contiguous_gradients": true,
30 |     "round_robin_gradients": true
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/deepspeed/ds_z3_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "overlap_comm": false,
21 |     "contiguous_gradients": true,
22 |     "sub_group_size": 1e9,
23 |     "reduce_bucket_size": "auto",
24 |     "stage3_prefetch_bucket_size": "auto",
25 |     "stage3_param_persistence_threshold": "auto",
26 |     "stage3_max_live_parameters": 1e9,
27 |     "stage3_max_reuse_distance": 1e9,
28 |     "stage3_gather_16bit_weights_on_model_save": true
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/deepspeed/ds_z3_offload_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "train_batch_size": "auto",
 3 |   "train_micro_batch_size_per_gpu": "auto",
 4 |   "gradient_accumulation_steps": "auto",
 5 |   "gradient_clipping": "auto",
 6 |   "zero_allow_untested_optimizer": true,
 7 |   "fp16": {
 8 |     "enabled": "auto",
 9 |     "loss_scale": 0,
10 |     "loss_scale_window": 1000,
11 |     "initial_scale_power": 16,
12 |     "hysteresis": 2,
13 |     "min_loss_scale": 1
14 |   },
15 |   "bf16": {
16 |     "enabled": "auto"
17 |   },
18 |   "zero_optimization": {
19 |     "stage": 3,
20 |     "offload_optimizer": {
21 |       "device": "cpu",
22 |       "pin_memory": true
23 |     },
24 |     "offload_param": {
25 |       "device": "cpu",
26 |       "pin_memory": true
27 |     },
28 |     "overlap_comm": false,
29 |     "contiguous_gradients": true,
30 |     "sub_group_size": 1e9,
31 |     "reduce_bucket_size": "auto",
32 |     "stage3_prefetch_bucket_size": "auto",
33 |     "stage3_param_persistence_threshold": "auto",
34 |     "stage3_max_live_parameters": 1e9,
35 |     "stage3_max_reuse_distance": 1e9,
36 |     "stage3_gather_16bit_weights_on_model_save": true
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/extras/adam_mini/qwen2_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_adam_mini: true
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: qwen
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | dataloader_num_workers: 4
19 | 
20 | ### output
21 | output_dir: saves/qwen2-1_5b/full/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | save_only_model: false
27 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-5
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/examples/extras/apollo/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_apollo: true
10 | apollo_layerwise: true  # choices: [true, false], use false for DDP training
11 | apollo_target: all
12 | apollo_rank: 128
13 | apollo_scale: 32.0
14 | apollo_scale_type: channel
15 | 
16 | ### dataset
17 | dataset: identity,alpaca_en_demo
18 | template: llama3
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | dataloader_num_workers: 4
24 | 
25 | ### output
26 | output_dir: saves/llama3-8b/full/sft
27 | logging_steps: 10
28 | save_steps: 500
29 | plot_loss: true
30 | overwrite_output_dir: true
31 | save_only_model: false
32 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
33 | 
34 | ### train
35 | per_device_train_batch_size: 1
36 | gradient_accumulation_steps: 1  # use 1 for layerwise apollo
37 | learning_rate: 1.0e-5
38 | num_train_epochs: 3.0
39 | lr_scheduler_type: cosine
40 | warmup_ratio: 0.1
41 | pure_bf16: true
42 | ddp_timeout: 180000000
43 | 
44 | ### eval
45 | # val_size: 0.1
46 | # per_device_eval_batch_size: 1
47 | # eval_strategy: steps
48 | # eval_steps: 500
49 | 


--------------------------------------------------------------------------------
/examples/extras/badam/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_badam: true
10 | badam_mode: layer
11 | badam_switch_mode: ascending
12 | badam_switch_interval: 50
13 | badam_verbose: 2
14 | # deepspeed: examples/deepspeed/ds_z3_config.json
15 | 
16 | ### dataset
17 | dataset: identity,alpaca_en_demo
18 | template: llama3
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | dataloader_num_workers: 4
24 | 
25 | ### output
26 | output_dir: saves/llama3-8b/full/sft
27 | logging_steps: 10
28 | save_steps: 500
29 | plot_loss: true
30 | overwrite_output_dir: true
31 | save_only_model: false
32 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
33 | 
34 | ### train
35 | per_device_train_batch_size: 1
36 | gradient_accumulation_steps: 8
37 | learning_rate: 1.0e-5
38 | num_train_epochs: 3.0
39 | lr_scheduler_type: cosine
40 | warmup_ratio: 0.1
41 | 
42 | ### eval
43 | # val_size: 0.1
44 | # per_device_eval_batch_size: 1
45 | # eval_strategy: steps
46 | # eval_steps: 500
47 | 


--------------------------------------------------------------------------------
/examples/extras/fsdp_qlora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | stage: sft
 8 | do_train: true
 9 | finetuning_type: lora
10 | lora_rank: 8
11 | lora_target: all
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | save_only_model: false
29 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 8
34 | learning_rate: 1.0e-4
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/examples/extras/fsdp_qlora/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # DO NOT use GPTQ/AWQ model in FSDP+QLoRA
3 | 
4 | CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
5 |     --config_file examples/accelerate/fsdp_config.yaml \
6 |     src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
7 | 


--------------------------------------------------------------------------------
/examples/extras/galore/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_galore: true
10 | galore_layerwise: true  # choices: [true, false], use false for DDP training
11 | galore_target: all
12 | galore_rank: 128
13 | galore_scale: 2.0
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | dataloader_num_workers: 4
23 | 
24 | ### output
25 | output_dir: saves/llama3-8b/full/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | save_only_model: false
31 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
32 | 
33 | ### train
34 | per_device_train_batch_size: 1
35 | gradient_accumulation_steps: 1  # use 1 for layerwise galore
36 | learning_rate: 1.0e-5
37 | num_train_epochs: 3.0
38 | lr_scheduler_type: cosine
39 | warmup_ratio: 0.1
40 | pure_bf16: true
41 | ddp_timeout: 180000000
42 | 
43 | ### eval
44 | # val_size: 0.1
45 | # per_device_eval_batch_size: 1
46 | # eval_strategy: steps
47 | # eval_steps: 500
48 | 


--------------------------------------------------------------------------------
/examples/extras/llama_pro/expand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/llama_pro.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pro \
6 |     --num_expand 8
7 | 


--------------------------------------------------------------------------------
/examples/extras/llama_pro/llama3_freeze_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: models/llama3-8b-pro
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: freeze
 9 | freeze_trainable_layers: 8
10 | freeze_trainable_modules: all
11 | use_llama_pro: true
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b-pro/freeze/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | save_only_model: false
29 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 8
34 | learning_rate: 1.0e-4
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/examples/extras/loraplus/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | loraplus_lr_ratio: 16.0
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | save_only_model: false
29 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 8
34 | learning_rate: 1.0e-4
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | 
41 | ### eval
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/examples/extras/mod/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | mixture_of_depths: convert
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | dataloader_num_workers: 4
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b-mod/full/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | save_only_model: false
27 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | optim: paged_adamw_8bit
33 | learning_rate: 1.0e-5
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | pure_bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/examples/extras/muon/qwen2_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2-1.5B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | use_muon: true
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: qwen
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | dataloader_num_workers: 4
19 | 
20 | ### output
21 | output_dir: saves/qwen2-1_5b/full/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | save_only_model: false
27 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-5
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | 
39 | ### eval
40 | # val_size: 0.1
41 | # per_device_eval_batch_size: 1
42 | # eval_strategy: steps
43 | # eval_steps: 500
44 | 


--------------------------------------------------------------------------------
/examples/extras/nlg_eval/llama3_lora_predict.yaml:
--------------------------------------------------------------------------------
 1 | # The batch generation can be SLOW using this config.
 2 | # For faster inference, we recommend to use `scripts/vllm_infer.py`.
 3 | 
 4 | ### model
 5 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 6 | adapter_name_or_path: saves/llama3-8b/lora/sft
 7 | trust_remote_code: true
 8 | 
 9 | ### method
10 | stage: sft
11 | do_predict: true
12 | finetuning_type: lora
13 | 
14 | ### dataset
15 | eval_dataset: identity,alpaca_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 50
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | dataloader_num_workers: 4
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/lora/predict
25 | overwrite_output_dir: true
26 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
27 | 
28 | ### eval
29 | per_device_eval_batch_size: 1
30 | predict_with_generate: true
31 | ddp_timeout: 180000000
32 | 


--------------------------------------------------------------------------------
/examples/extras/pissa/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python scripts/pissa_init.py \
4 |     --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
5 |     --output_dir models/llama3-8b-pissa
6 | 


--------------------------------------------------------------------------------
/examples/extras/pissa/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pissa_init: true
12 | pissa_iter: 16
13 | pissa_convert: true
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | dataloader_num_workers: 4
23 | 
24 | ### output
25 | output_dir: saves/llama3-8b/lora/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | save_only_model: false
31 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
32 | 
33 | ### train
34 | per_device_train_batch_size: 1
35 | gradient_accumulation_steps: 8
36 | learning_rate: 1.0e-4
37 | num_train_epochs: 3.0
38 | lr_scheduler_type: cosine
39 | warmup_ratio: 0.1
40 | bf16: true
41 | ddp_timeout: 180000000
42 | 
43 | ### eval
44 | # val_size: 0.1
45 | # per_device_eval_batch_size: 1
46 | # eval_strategy: steps
47 | # eval_steps: 500
48 | 


--------------------------------------------------------------------------------
/examples/inference/llama3.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | template: llama3
3 | infer_backend: huggingface  # choices: [huggingface, vllm, sglang]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/examples/inference/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: saves/llama3-8b/full/sft
2 | template: llama3
3 | infer_backend: huggingface  # choices: [huggingface, vllm, sglang]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/examples/inference/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
2 | adapter_name_or_path: saves/llama3-8b/lora/sft
3 | template: llama3
4 | infer_backend: huggingface  # choices: [huggingface, vllm, sglang]
5 | trust_remote_code: true
6 | 


--------------------------------------------------------------------------------
/examples/inference/qwen2_5vl.yaml:
--------------------------------------------------------------------------------
1 | model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
2 | template: qwen2_vl
3 | infer_backend: huggingface  # choices: [huggingface, vllm, sglang]
4 | trust_remote_code: true
5 | 


--------------------------------------------------------------------------------
/examples/merge_lora/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: saves/llama3-8b/full/sft
 3 | template: llama3
 4 | trust_remote_code: true
 5 | 
 6 | ### export
 7 | export_dir: output/llama3_full_sft
 8 | export_size: 5
 9 | export_device: cpu  # choices: [cpu, auto]
10 | export_legacy_format: false
11 | 


--------------------------------------------------------------------------------
/examples/merge_lora/llama3_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | template: llama3
 4 | trust_remote_code: true
 5 | 
 6 | ### export
 7 | export_dir: output/llama3_gptq
 8 | export_quantization_bit: 4
 9 | export_quantization_dataset: data/c4_demo.jsonl
10 | export_size: 5
11 | export_device: cpu  # choices: [cpu, auto]
12 | export_legacy_format: false
13 | 


--------------------------------------------------------------------------------
/examples/merge_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 5 | adapter_name_or_path: saves/llama3-8b/lora/sft
 6 | template: llama3
 7 | trust_remote_code: true
 8 | 
 9 | ### export
10 | export_dir: output/llama3_lora_sft
11 | export_size: 5
12 | export_device: cpu  # choices: [cpu, auto]
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/examples/merge_lora/qwen2_5vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
 2 | 
 3 | ### model
 4 | model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
 5 | adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft
 6 | template: qwen2_vl
 7 | trust_remote_code: true
 8 | 
 9 | ### export
10 | export_dir: output/qwen2_5vl_lora_sft
11 | export_size: 5
12 | export_device: cpu  # choices: [cpu, auto]
13 | export_legacy_format: false
14 | 


--------------------------------------------------------------------------------
/examples/train_full/llama3_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: full
 9 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
10 | 
11 | ### dataset
12 | dataset: identity,alpaca_en_demo
13 | template: llama3
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | dataloader_num_workers: 4
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/full/sft
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | save_only_model: false
27 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 2
32 | learning_rate: 1.0e-5
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | resume_from_checkpoint: null
39 | 
40 | ### eval
41 | # eval_dataset: alpaca_en_demo
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/examples/train_full/qwen2_5vl_full_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
 3 | image_max_pixels: 262144
 4 | video_max_pixels: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: full
11 | freeze_vision_tower: true
12 | freeze_multi_modal_projector: true
13 | freeze_language_model: false
14 | deepspeed: examples/deepspeed/ds_z3_config.json
15 | 
16 | ### dataset
17 | dataset: mllm_demo,identity,alpaca_en_demo
18 | template: qwen2_vl
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | dataloader_num_workers: 4
24 | 
25 | ### output
26 | output_dir: saves/qwen2_5vl-7b/full/sft
27 | logging_steps: 10
28 | save_steps: 500
29 | plot_loss: true
30 | overwrite_output_dir: true
31 | save_only_model: false
32 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
33 | 
34 | ### train
35 | per_device_train_batch_size: 1
36 | gradient_accumulation_steps: 2
37 | learning_rate: 1.0e-5
38 | num_train_epochs: 3.0
39 | lr_scheduler_type: cosine
40 | warmup_ratio: 0.1
41 | bf16: true
42 | ddp_timeout: 180000000
43 | resume_from_checkpoint: null
44 | 
45 | ### eval
46 | # val_size: 0.1
47 | # per_device_eval_batch_size: 1
48 | # eval_strategy: steps
49 | # eval_steps: 500
50 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: dpo
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pref_beta: 0.1
12 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
13 | 
14 | ### dataset
15 | dataset: dpo_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | dataloader_num_workers: 4
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/lora/dpo
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | save_only_model: false
30 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
31 | 
32 | ### train
33 | per_device_train_batch_size: 1
34 | gradient_accumulation_steps: 8
35 | learning_rate: 5.0e-6
36 | num_train_epochs: 3.0
37 | lr_scheduler_type: cosine
38 | warmup_ratio: 0.1
39 | bf16: true
40 | ddp_timeout: 180000000
41 | resume_from_checkpoint: null
42 | 
43 | ### eval
44 | # eval_dataset: dpo_en_demo
45 | # val_size: 0.1
46 | # per_device_eval_batch_size: 1
47 | # eval_strategy: steps
48 | # eval_steps: 500
49 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_eval.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | adapter_name_or_path: saves/llama3-8b/lora/sft
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | finetuning_type: lora
 8 | 
 9 | ### dataset
10 | task: mmlu_test  # choices: [mmlu_test, ceval_validation, cmmlu_test]
11 | template: fewshot
12 | lang: en
13 | n_shot: 5
14 | 
15 | ### output
16 | save_dir: saves/llama3-8b/lora/eval
17 | 
18 | ### eval
19 | batch_size: 4
20 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_kto.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: kto
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | pref_beta: 0.1
12 | 
13 | ### dataset
14 | dataset: kto_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/kto
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 5.0e-6
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_ppo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | reward_model: saves/llama3-8b/lora/reward
 4 | trust_remote_code: true
 5 | 
 6 | ### method
 7 | stage: ppo
 8 | do_train: true
 9 | finetuning_type: lora
10 | lora_rank: 8
11 | lora_target: all
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/ppo
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-5
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### generate
41 | max_new_tokens: 512
42 | top_k: 0
43 | top_p: 0.9
44 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_pretrain.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: pt
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: c4_demo
14 | cutoff_len: 2048
15 | max_samples: 1000
16 | overwrite_cache: true
17 | preprocessing_num_workers: 16
18 | dataloader_num_workers: 4
19 | 
20 | ### output
21 | output_dir: saves/llama3-8b/lora/pretrain
22 | logging_steps: 10
23 | save_steps: 500
24 | plot_loss: true
25 | overwrite_output_dir: true
26 | save_only_model: false
27 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
28 | 
29 | ### train
30 | per_device_train_batch_size: 1
31 | gradient_accumulation_steps: 8
32 | learning_rate: 1.0e-4
33 | num_train_epochs: 3.0
34 | lr_scheduler_type: cosine
35 | warmup_ratio: 0.1
36 | bf16: true
37 | ddp_timeout: 180000000
38 | resume_from_checkpoint: null
39 | 
40 | ### eval
41 | # eval_dataset: c4_demo
42 | # val_size: 0.1
43 | # per_device_eval_batch_size: 1
44 | # eval_strategy: steps
45 | # eval_steps: 500
46 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_reward.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: rm
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: dpo_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | dataloader_num_workers: 4
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/reward
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | save_only_model: false
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | resume_from_checkpoint: null
40 | 
41 | ### eval
42 | # eval_dataset: dpo_en_demo
43 | # val_size: 0.1
44 | # per_device_eval_batch_size: 1
45 | # eval_strategy: steps
46 | # eval_steps: 500
47 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_sft.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | 
 5 | MODEL_PATH=meta-llama/Meta-Llama-3-8B-Instruct
 6 | 
 7 | llamafactory-cli train \
 8 |     --model_name_or_path ${MODEL_PATH} \
 9 |     --trust_remote_code \
10 |     --stage sft \
11 |     --do_train \
12 |     --finetuning_type lora \
13 |     --lora_rank 8 \
14 |     --lora_target all \
15 |     --dataset identity,alpaca_en_demo \
16 |     --template llama3 \
17 |     --cutoff_len 2048 \
18 |     --max_samples 1000 \
19 |     --overwrite_cache \
20 |     --preprocessing_num_workers 16 \
21 |     --dataloader_num_workers 4 \
22 |     --output_dir saves/llama3-8b/lora/sft \
23 |     --logging_steps 10 \
24 |     --save_steps 500 \
25 |     --plot_loss \
26 |     --overwrite_output_dir \
27 |     --save_only_model false \
28 |     --report_to none \
29 |     --per_device_train_batch_size 1 \
30 |     --gradient_accumulation_steps 8 \
31 |     --learning_rate 1e-4 \
32 |     --num_train_epochs 3.0 \
33 |     --lr_scheduler_type cosine \
34 |     --warmup_ratio 0.1 \
35 |     --bf16 \
36 |     --ddp_timeout 180000000
37 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | dataloader_num_workers: 4
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | save_only_model: false
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | resume_from_checkpoint: null
40 | 
41 | ### eval
42 | # eval_dataset: alpaca_en_demo
43 | # val_size: 0.1
44 | # per_device_eval_batch_size: 1
45 | # eval_strategy: steps
46 | # eval_steps: 500
47 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_sft_ds3.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
12 | 
13 | ### dataset
14 | dataset: identity,alpaca_en_demo
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: saves/llama3-8b/lora/sft
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | save_only_model: false
29 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
30 | 
31 | ### train
32 | per_device_train_batch_size: 1
33 | gradient_accumulation_steps: 2
34 | learning_rate: 1.0e-4
35 | num_train_epochs: 3.0
36 | lr_scheduler_type: cosine
37 | warmup_ratio: 0.1
38 | bf16: true
39 | ddp_timeout: 180000000
40 | resume_from_checkpoint: null
41 | 
42 | ### eval
43 | # eval_dataset: alpaca_en_demo
44 | # val_size: 0.1
45 | # per_device_eval_batch_size: 1
46 | # eval_strategy: steps
47 | # eval_steps: 500
48 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_lora_sft_ray.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct  # or use local absolute path
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | dataset_dir: REMOTE:llamafactory/demo_data  # or use local absolute path
15 | template: llama3
16 | cutoff_len: 2048
17 | max_samples: 1000
18 | overwrite_cache: true
19 | preprocessing_num_workers: 16
20 | dataloader_num_workers: 4
21 | 
22 | ### output
23 | output_dir: tmp_dir
24 | logging_steps: 10
25 | save_steps: 500
26 | plot_loss: true
27 | overwrite_output_dir: true
28 | save_only_model: false
29 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
30 | 
31 | ### ray
32 | ray_run_name: llama3_8b_sft_lora
33 | ray_storage_path: ./saves
34 | ray_num_workers: 4  # Number of GPUs to use.
35 | placement_strategy: PACK
36 | resources_per_worker:
37 |   GPU: 1
38 | # ray_init_kwargs:
39 | #   runtime_env:
40 | #     env_vars:
41 | #       <YOUR-ENV-VAR-HERE>: "<YOUR-ENV-VAR-HERE>"
42 | #     pip:
43 | #       - emoji
44 | 
45 | ### train
46 | per_device_train_batch_size: 1
47 | gradient_accumulation_steps: 8
48 | learning_rate: 1.0e-4
49 | num_train_epochs: 3.0
50 | lr_scheduler_type: cosine
51 | warmup_ratio: 0.1
52 | bf16: true
53 | ddp_timeout: 180000000
54 | resume_from_checkpoint: null
55 | 
56 | ### eval
57 | # eval_dataset: alpaca_en_demo
58 | # val_size: 0.1
59 | # per_device_eval_batch_size: 1
60 | # eval_strategy: steps
61 | # eval_steps: 500
62 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama3_preprocess.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | tokenized_path: saves/llama3-8b/dataset/sft
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | overwrite_output_dir: true
24 | 


--------------------------------------------------------------------------------
/examples/train_lora/llama4_lora_sft_ds3.yaml:
--------------------------------------------------------------------------------
 1 | # pip install git+https://github.com/hiyouga/transformers.git@llama4_train
 2 | 
 3 | ### model
 4 | model_name_or_path: meta-llama/Llama-4-Scout-17B-16E-Instruct
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
14 | 
15 | ### dataset
16 | dataset: mllm_demo,identity,alpaca_en_demo
17 | template: llama4
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | dataloader_num_workers: 4
23 | 
24 | ### output
25 | output_dir: saves/llama4-8b/lora/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | save_only_model: false
31 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
32 | 
33 | ### train
34 | per_device_train_batch_size: 1
35 | gradient_accumulation_steps: 2
36 | learning_rate: 1.0e-4
37 | num_train_epochs: 3.0
38 | lr_scheduler_type: cosine
39 | warmup_ratio: 0.1
40 | bf16: true
41 | ddp_timeout: 180000000
42 | resume_from_checkpoint: null
43 | 
44 | ### eval
45 | # eval_dataset: alpaca_en_demo
46 | # val_size: 0.1
47 | # per_device_eval_batch_size: 1
48 | # eval_strategy: steps
49 | # eval_steps: 500
50 | 


--------------------------------------------------------------------------------
/examples/train_lora/qwen2_5vl_lora_dpo.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
 3 | image_max_pixels: 262144
 4 | video_max_pixels: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: dpo
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | pref_beta: 0.1
14 | pref_loss: sigmoid  # choices: [sigmoid (dpo), orpo, simpo]
15 | 
16 | ### dataset
17 | dataset: rlhf_v
18 | template: qwen2_vl
19 | cutoff_len: 2048
20 | max_samples: 1000
21 | overwrite_cache: true
22 | preprocessing_num_workers: 16
23 | dataloader_num_workers: 4
24 | 
25 | ### output
26 | output_dir: saves/qwen2_5vl-7b/lora/dpo
27 | logging_steps: 10
28 | save_steps: 500
29 | plot_loss: true
30 | overwrite_output_dir: true
31 | save_only_model: false
32 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
33 | 
34 | ### train
35 | per_device_train_batch_size: 1
36 | gradient_accumulation_steps: 8
37 | learning_rate: 5.0e-6
38 | num_train_epochs: 3.0
39 | lr_scheduler_type: cosine
40 | warmup_ratio: 0.1
41 | bf16: true
42 | ddp_timeout: 180000000
43 | resume_from_checkpoint: null
44 | 
45 | ### eval
46 | # val_size: 0.1
47 | # per_device_eval_batch_size: 1
48 | # eval_strategy: steps
49 | # eval_steps: 500
50 | 


--------------------------------------------------------------------------------
/examples/train_lora/qwen2_5vl_lora_sft.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
 3 | image_max_pixels: 262144
 4 | video_max_pixels: 16384
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | 
14 | ### dataset
15 | dataset: mllm_demo,identity,alpaca_en_demo  # video: mllm_video_demo
16 | template: qwen2_vl
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | dataloader_num_workers: 4
22 | 
23 | ### output
24 | output_dir: saves/qwen2_5vl-7b/lora/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | save_only_model: false
30 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
31 | 
32 | ### train
33 | per_device_train_batch_size: 1
34 | gradient_accumulation_steps: 8
35 | learning_rate: 1.0e-4
36 | num_train_epochs: 3.0
37 | lr_scheduler_type: cosine
38 | warmup_ratio: 0.1
39 | bf16: true
40 | ddp_timeout: 180000000
41 | resume_from_checkpoint: null
42 | 
43 | ### eval
44 | # val_size: 0.1
45 | # per_device_eval_batch_size: 1
46 | # eval_strategy: steps
47 | # eval_steps: 500
48 | 


--------------------------------------------------------------------------------
/examples/train_qlora/llama3_lora_sft_aqlm.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | dataloader_num_workers: 4
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | save_only_model: false
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/examples/train_qlora/llama3_lora_sft_awq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-AWQ
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | dataloader_num_workers: 4
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | save_only_model: false
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/examples/train_qlora/llama3_lora_sft_bnb_npu.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4
 4 | quantization_method: bnb
 5 | double_quantization: false
 6 | trust_remote_code: true
 7 | 
 8 | ### method
 9 | stage: sft
10 | do_train: true
11 | finetuning_type: lora
12 | lora_rank: 8
13 | lora_target: all
14 | 
15 | ### dataset
16 | dataset: identity,alpaca_en_demo
17 | template: llama3
18 | cutoff_len: 2048
19 | max_samples: 1000
20 | overwrite_cache: true
21 | preprocessing_num_workers: 16
22 | dataloader_num_workers: 4
23 | 
24 | ### output
25 | output_dir: saves/llama3-8b/lora/sft
26 | logging_steps: 10
27 | save_steps: 500
28 | plot_loss: true
29 | overwrite_output_dir: true
30 | save_only_model: false
31 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
32 | 
33 | ### train
34 | per_device_train_batch_size: 1
35 | gradient_accumulation_steps: 8
36 | learning_rate: 1.0e-4
37 | num_train_epochs: 3.0
38 | lr_scheduler_type: cosine
39 | warmup_ratio: 0.1
40 | bf16: true
41 | ddp_timeout: 180000000
42 | 
43 | ### eval
44 | # val_size: 0.1
45 | # per_device_eval_batch_size: 1
46 | # eval_strategy: steps
47 | # eval_steps: 500
48 | 


--------------------------------------------------------------------------------
/examples/train_qlora/llama3_lora_sft_gptq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ
 3 | trust_remote_code: true
 4 | 
 5 | ### method
 6 | stage: sft
 7 | do_train: true
 8 | finetuning_type: lora
 9 | lora_rank: 8
10 | lora_target: all
11 | 
12 | ### dataset
13 | dataset: identity,alpaca_en_demo
14 | template: llama3
15 | cutoff_len: 2048
16 | max_samples: 1000
17 | overwrite_cache: true
18 | preprocessing_num_workers: 16
19 | dataloader_num_workers: 4
20 | 
21 | ### output
22 | output_dir: saves/llama3-8b/lora/sft
23 | logging_steps: 10
24 | save_steps: 500
25 | plot_loss: true
26 | overwrite_output_dir: true
27 | save_only_model: false
28 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
29 | 
30 | ### train
31 | per_device_train_batch_size: 1
32 | gradient_accumulation_steps: 8
33 | learning_rate: 1.0e-4
34 | num_train_epochs: 3.0
35 | lr_scheduler_type: cosine
36 | warmup_ratio: 0.1
37 | bf16: true
38 | ddp_timeout: 180000000
39 | 
40 | ### eval
41 | # val_size: 0.1
42 | # per_device_eval_batch_size: 1
43 | # eval_strategy: steps
44 | # eval_steps: 500
45 | 


--------------------------------------------------------------------------------
/examples/train_qlora/llama3_lora_sft_otfq.yaml:
--------------------------------------------------------------------------------
 1 | ### model
 2 | model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 3 | quantization_bit: 4  # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
 4 | quantization_method: bnb  # choices: [bnb, hqq, eetq]
 5 | trust_remote_code: true
 6 | 
 7 | ### method
 8 | stage: sft
 9 | do_train: true
10 | finetuning_type: lora
11 | lora_rank: 8
12 | lora_target: all
13 | 
14 | ### dataset
15 | dataset: identity,alpaca_en_demo
16 | template: llama3
17 | cutoff_len: 2048
18 | max_samples: 1000
19 | overwrite_cache: true
20 | preprocessing_num_workers: 16
21 | dataloader_num_workers: 4
22 | 
23 | ### output
24 | output_dir: saves/llama3-8b/lora/sft
25 | logging_steps: 10
26 | save_steps: 500
27 | plot_loss: true
28 | overwrite_output_dir: true
29 | save_only_model: false
30 | report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]
31 | 
32 | ### train
33 | per_device_train_batch_size: 1
34 | gradient_accumulation_steps: 8
35 | learning_rate: 1.0e-4
36 | num_train_epochs: 3.0
37 | lr_scheduler_type: cosine
38 | warmup_ratio: 0.1
39 | bf16: true
40 | ddp_timeout: 180000000
41 | 
42 | ### eval
43 | # val_size: 0.1
44 | # per_device_eval_batch_size: 1
45 | # eval_strategy: steps
46 | # eval_steps: 500
47 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "llamafactory"
 7 | dynamic = [
 8 |     "version",
 9 |     "dependencies",
10 |     "optional-dependencies",
11 |     "requires-python",
12 |     "scripts",
13 |     "authors",
14 |     "description",
15 |     "readme",
16 |     "license",
17 |     "keywords",
18 |     "classifiers"
19 | ]
20 | 
21 | [tool.ruff]
22 | target-version = "py39"
23 | line-length = 119
24 | indent-width = 4
25 | 
26 | [tool.ruff.lint]
27 | ignore = [
28 |     "C408", # collection
29 |     "C901", # complex
30 |     "E501", # line too long
31 |     "E731", # lambda function
32 |     "E741", # ambiguous var name
33 |     "D100", # no doc public module
34 |     "D101", # no doc public class
35 |     "D102", # no doc public method
36 |     "D103", # no doc public function
37 |     "D104", # no doc public package
38 |     "D105", # no doc magic method
39 |     "D107", # no doc __init__
40 | ]
41 | extend-select = [
42 |     "C",      # complexity
43 |     "E",      # error
44 |     "F",      # pyflakes
45 |     "I",      # isort
46 |     "W",      # warning
47 |     "UP",     # pyupgrade
48 |     "D",      # pydocstyle
49 |     "PT009",  # pytest assert
50 |     "RUF022", # sort __all__
51 | ]
52 | 
53 | [tool.ruff.lint.isort]
54 | lines-after-imports = 2
55 | known-first-party = ["llamafactory"]
56 | known-third-party = [
57 |     "accelerate",
58 |     "datasets",
59 |     "gradio",
60 |     "numpy",
61 |     "peft",
62 |     "torch",
63 |     "transformers",
64 |     "trl",
65 | ]
66 | 
67 | [tool.ruff.lint.pydocstyle]
68 | convention = "google"
69 | 
70 | [tool.ruff.format]
71 | quote-style = "double"
72 | indent-style = "space"
73 | docstring-code-format = true
74 | skip-magic-trailing-comma = false
75 | line-ending = "auto"
76 | 
77 | [tool.uv]
78 | conflicts = [
79 |     [
80 |         { extra = "torch-npu" },
81 |         { extra = "aqlm" },
82 |     ],
83 |     [
84 |         { extra = "torch-npu" },
85 |         { extra = "liger-kernel" },
86 |     ],
87 |     [
88 |         { extra = "torch-npu" },
89 |         { extra = "vllm" },
90 |     ],
91 |     [
92 |         { extra = "torch-npu" },
93 |         { extra = "sglang" },
94 |     ],
95 | ]
96 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | transformers>=4.45.0,<=4.52.4,!=4.46.*,!=4.47.*,!=4.48.0,!=4.52.0; sys_platform != 'darwin'
 2 | transformers>=4.45.0,<=4.51.3,!=4.46.*,!=4.47.*,!=4.48.0,!=4.52.0; sys_platform == 'darwin'
 3 | datasets>=2.16.0,<=3.6.0
 4 | accelerate>=0.34.0,<=1.7.0
 5 | peft>=0.14.0,<=0.15.2
 6 | trl>=0.8.6,<=0.9.6
 7 | tokenizers>=0.19.0,<=0.21.1
 8 | gradio>=4.38.0,<=5.31.0
 9 | scipy
10 | einops
11 | sentencepiece
12 | tiktoken
13 | protobuf
14 | uvicorn
15 | fastapi
16 | sse-starlette
17 | matplotlib>=3.7.0
18 | fire
19 | omegaconf
20 | packaging
21 | pyyaml
22 | numpy<2.0.0
23 | pydantic<=2.10.6
24 | pandas>=2.0.0
25 | av
26 | librosa
27 | tyro<0.9.0
28 | 


--------------------------------------------------------------------------------
/scripts/api_example/test_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from openai import OpenAI
18 | from transformers.utils.versions import require_version
19 | 
20 | 
21 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
22 | 
23 | 
24 | def main():
25 |     client = OpenAI(
26 |         api_key="{}".format(os.getenv("API_KEY", "0")),
27 |         base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
28 |     )
29 |     messages = []
30 |     messages.append(
31 |         {
32 |             "role": "user",
33 |             "content": [
34 |                 {"type": "text", "text": "Output the color and number of each box."},
35 |                 {
36 |                     "type": "image_url",
37 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/boxes.png"},
38 |                 },
39 |             ],
40 |         }
41 |     )
42 |     result = client.chat.completions.create(messages=messages, model="test")
43 |     messages.append(result.choices[0].message)
44 |     print("Round 1:", result.choices[0].message.content)
45 |     # The image shows a pyramid of colored blocks with numbers on them. Here are the colors and numbers of ...
46 |     messages.append(
47 |         {
48 |             "role": "user",
49 |             "content": [
50 |                 {"type": "text", "text": "What kind of flower is this?"},
51 |                 {
52 |                     "type": "image_url",
53 |                     "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-VL/flowers.jpg"},
54 |                 },
55 |             ],
56 |         }
57 |     )
58 |     result = client.chat.completions.create(messages=messages, model="test")
59 |     messages.append(result.choices[0].message)
60 |     print("Round 2:", result.choices[0].message.content)
61 |     # The image shows a cluster of forget-me-not flowers. Forget-me-nots are small ...
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     main()
66 | 


--------------------------------------------------------------------------------
/scripts/api_example/test_toolcall.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import os
17 | 
18 | from openai import OpenAI
19 | from transformers.utils.versions import require_version
20 | 
21 | 
22 | require_version("openai>=1.5.0", "To fix: pip install openai>=1.5.0")
23 | 
24 | 
25 | def calculate_gpa(grades: list[str], hours: list[int]) -> float:
26 |     grade_to_score = {"A": 4, "B": 3, "C": 2}
27 |     total_score, total_hour = 0, 0
28 |     for grade, hour in zip(grades, hours):
29 |         total_score += grade_to_score[grade] * hour
30 |         total_hour += hour
31 |     return round(total_score / total_hour, 2)
32 | 
33 | 
34 | def main():
35 |     client = OpenAI(
36 |         api_key="{}".format(os.getenv("API_KEY", "0")),
37 |         base_url="http://localhost:{}/v1".format(os.getenv("API_PORT", 8000)),
38 |     )
39 |     tools = [
40 |         {
41 |             "type": "function",
42 |             "function": {
43 |                 "name": "calculate_gpa",
44 |                 "description": "Calculate the Grade Point Average (GPA) based on grades and credit hours",
45 |                 "parameters": {
46 |                     "type": "object",
47 |                     "properties": {
48 |                         "grades": {"type": "array", "items": {"type": "string"}, "description": "The grades"},
49 |                         "hours": {"type": "array", "items": {"type": "integer"}, "description": "The credit hours"},
50 |                     },
51 |                     "required": ["grades", "hours"],
52 |                 },
53 |             },
54 |         }
55 |     ]
56 |     tool_map = {"calculate_gpa": calculate_gpa}
57 | 
58 |     messages = []
59 |     messages.append({"role": "user", "content": "My grades are A, A, B, and C. The credit hours are 3, 4, 3, and 2."})
60 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
61 |     if result.choices[0].message.tool_calls is None:
62 |         raise ValueError("Cannot retrieve function call from the response.")
63 | 
64 |     messages.append(result.choices[0].message)
65 |     tool_call = result.choices[0].message.tool_calls[0].function
66 |     print(tool_call)
67 |     # Function(arguments='{"grades": ["A", "A", "B", "C"], "hours": [3, 4, 3, 2]}', name='calculate_gpa')
68 |     name, arguments = tool_call.name, json.loads(tool_call.arguments)
69 |     tool_result = tool_map[name](**arguments)
70 |     messages.append({"role": "tool", "content": json.dumps({"gpa": tool_result}, ensure_ascii=False)})
71 |     result = client.chat.completions.create(messages=messages, model="test", tools=tools)
72 |     print(result.choices[0].message.content)
73 |     # Based on the grades and credit hours you provided, your Grade Point Average (GPA) is 3.42.
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     main()
78 | 


--------------------------------------------------------------------------------
/scripts/convert_ckpt/tiny_llama4.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import Llama4Config, Llama4ForConditionalGeneration, Llama4TextConfig, Llama4VisionConfig
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     vision_config = Llama4VisionConfig(
20 |         hidden_size=1408,
21 |         image_size=336,
22 |         intermediate_size=5632,
23 |         num_attention_heads=16,
24 |         num_hidden_layers=4,
25 |         vision_output_dim=4096,
26 |     )
27 |     text_config = Llama4TextConfig(
28 |         hidden_size=512,
29 |         intermediate_size=1024,
30 |         intermediate_size_mlp=1024,
31 |         num_hidden_layers=4,
32 |         num_attention_heads=8,
33 |         num_key_value_heads=2,
34 |         head_dim=512 // 8,
35 |         num_local_experts=2,
36 |     )
37 |     config = Llama4Config(vision_config=vision_config, text_config=text_config)
38 |     model = Llama4ForConditionalGeneration._from_config(config)
39 |     model.save_pretrained("tiny-llama4")
40 | 


--------------------------------------------------------------------------------
/scripts/eval_bleu_rouge.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | import logging
17 | import time
18 | 
19 | import fire
20 | from datasets import load_dataset
21 | 
22 | 
23 | try:
24 |     import jieba  # type: ignore
25 |     from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu  # type: ignore
26 |     from rouge_chinese import Rouge  # type: ignore
27 | 
28 |     jieba.setLogLevel(logging.CRITICAL)
29 |     jieba.initialize()
30 | except ImportError:
31 |     print("Please install llamafactory with `pip install -e .[metrics]`.")
32 |     raise
33 | 
34 | 
35 | def compute_metrics(sample):
36 |     hypothesis = list(jieba.cut(sample["predict"]))
37 |     reference = list(jieba.cut(sample["label"]))
38 | 
39 |     bleu_score = sentence_bleu(
40 |         [list(sample["label"])],
41 |         list(sample["predict"]),
42 |         smoothing_function=SmoothingFunction().method3,
43 |     )
44 | 
45 |     if len(" ".join(hypothesis).split()) == 0 or len(" ".join(reference).split()) == 0:
46 |         result = {"rouge-1": {"f": 0.0}, "rouge-2": {"f": 0.0}, "rouge-l": {"f": 0.0}}
47 |     else:
48 |         rouge = Rouge()
49 |         scores = rouge.get_scores(" ".join(hypothesis), " ".join(reference))
50 |         result = scores[0]
51 | 
52 |     metric_result = {}
53 |     for k, v in result.items():
54 |         metric_result[k] = round(v["f"] * 100, 4)
55 | 
56 |     metric_result["bleu-4"] = round(bleu_score * 100, 4)
57 | 
58 |     return metric_result
59 | 
60 | 
61 | def main(filename: str):
62 |     start_time = time.time()
63 |     dataset = load_dataset("json", data_files=filename, split="train")
64 |     dataset = dataset.map(compute_metrics, num_proc=8, remove_columns=dataset.column_names)
65 |     score_dict = dataset.to_dict()
66 | 
67 |     average_score = {}
68 |     for task, scores in sorted(score_dict.items(), key=lambda x: x[0]):
69 |         print(f"{task}: {sum(scores) / len(scores):.4f}")
70 |         average_score[task] = sum(scores) / len(scores)
71 | 
72 |     with open("predictions_score.json", "w", encoding="utf-8") as f:
73 |         json.dump(average_score, f, indent=4)
74 | 
75 |     print(f"\nDone in {time.time() - start_time:.3f}s.\nScore file saved to predictions_score.json")
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     fire.Fire(main)
80 | 


--------------------------------------------------------------------------------
/scripts/stat_utils/cal_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Microsoft Corporation and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the Microsoft's DeepSpeed library.
 4 | # https://www.deepspeed.ai/tutorials/flops-profiler/
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import fire
19 | import torch
20 | from deepspeed.accelerator import get_accelerator  # type: ignore
21 | from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
22 | 
23 | from llamafactory.chat import ChatModel
24 | 
25 | 
26 | def calculate_flops(
27 |     model_name_or_path: str,
28 |     batch_size: int = 1,
29 |     seq_length: int = 512,
30 |     flash_attn: str = "auto",
31 | ):
32 |     r"""Calculate the flops of pre-trained models.
33 | 
34 |     Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
35 |     """
36 |     with get_accelerator().device(0):
37 |         chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="empty", flash_attn=flash_attn))
38 |         fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.engine.model.device)
39 |         input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
40 |         flops, macs, params = get_model_profile(
41 |             chat_model.engine.model, kwargs=input_dict, print_profile=True, detailed=True
42 |         )
43 |         print("FLOPs:", flops)
44 |         print("MACs:", macs)
45 |         print("Params:", params)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     fire.Fire(calculate_flops)
50 | 


--------------------------------------------------------------------------------
/scripts/stat_utils/length_cdf.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections import defaultdict
16 | 
17 | import fire
18 | from tqdm import tqdm
19 | 
20 | from llamafactory.data import get_dataset, get_template_and_fix_tokenizer
21 | from llamafactory.hparams import get_train_args
22 | from llamafactory.model import load_tokenizer
23 | 
24 | 
25 | def length_cdf(
26 |     model_name_or_path: str,
27 |     dataset: str = "alpaca_en_demo",
28 |     dataset_dir: str = "data",
29 |     template: str = "default",
30 |     interval: int = 1000,
31 | ):
32 |     r"""Calculate the distribution of the input lengths in the dataset.
33 | 
34 |     Usage: export CUDA_VISIBLE_DEVICES=0
35 |     python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
36 |     """
37 |     model_args, data_args, training_args, _, _ = get_train_args(
38 |         dict(
39 |             stage="sft",
40 |             model_name_or_path=model_name_or_path,
41 |             dataset=dataset,
42 |             dataset_dir=dataset_dir,
43 |             template=template,
44 |             cutoff_len=1_000_000,
45 |             preprocessing_num_workers=16,
46 |             output_dir="dummy_dir",
47 |             overwrite_cache=True,
48 |             do_train=True,
49 |         )
50 |     )
51 |     tokenizer_module = load_tokenizer(model_args)
52 |     template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
53 |     trainset = get_dataset(template, model_args, data_args, training_args, "sft", **tokenizer_module)["train_dataset"]
54 |     total_num = len(trainset)
55 |     length_dict = defaultdict(int)
56 |     for sample in tqdm(trainset["input_ids"], desc="Collecting lengths"):
57 |         length_dict[len(sample) // interval * interval] += 1
58 | 
59 |     length_tuples = list(length_dict.items())
60 |     length_tuples.sort()
61 |     count_accu, prob_accu = 0, 0
62 |     for length, count in length_tuples:
63 |         count_accu += count
64 |         prob_accu += count / total_num * 100
65 |         print(f"{count_accu:d} ({prob_accu:.2f}%) samples have length < {length + interval}.")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     fire.Fire(length_cdf)
70 | 


--------------------------------------------------------------------------------
/src/api.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import uvicorn
18 | 
19 | from llamafactory.api.app import create_app
20 | from llamafactory.chat import ChatModel
21 | 
22 | 
23 | def main():
24 |     chat_model = ChatModel()
25 |     app = create_app(chat_model)
26 |     api_host = os.getenv("API_HOST", "0.0.0.0")
27 |     api_port = int(os.getenv("API_PORT", "8000"))
28 |     print(f"Visit http://localhost:{api_port}/docs for API document.")
29 |     uvicorn.run(app, host=api_host, port=api_port)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/src/llamafactory/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | r"""Efficient fine-tuning of large language models.
16 | 
17 | Level:
18 |   api, webui > chat, eval, train > data, model > hparams > extras
19 | 
20 | Disable version checking: DISABLE_VERSION_CHECK=1
21 | Enable VRAM recording: RECORD_VRAM=1
22 | Force using torchrun: FORCE_TORCHRUN=1
23 | Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
24 | Use modelscope: USE_MODELSCOPE_HUB=1
25 | Use openmind: USE_OPENMIND_HUB=1
26 | """
27 | 
28 | from .extras.env import VERSION
29 | 
30 | 
31 | __version__ = VERSION
32 | 


--------------------------------------------------------------------------------
/src/llamafactory/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/api/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/api/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from typing import TYPE_CHECKING, Any
17 | 
18 | 
19 | if TYPE_CHECKING:
20 |     from pydantic import BaseModel
21 | 
22 | 
23 | def dictify(data: "BaseModel") -> dict[str, Any]:
24 |     try:  # pydantic v2
25 |         return data.model_dump(exclude_unset=True)
26 |     except AttributeError:  # pydantic v1
27 |         return data.dict(exclude_unset=True)
28 | 
29 | 
30 | def jsonify(data: "BaseModel") -> str:
31 |     try:  # pydantic v2
32 |         return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
33 |     except AttributeError:  # pydantic v1
34 |         return data.json(exclude_unset=True, ensure_ascii=False)
35 | 


--------------------------------------------------------------------------------
/src/llamafactory/chat/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base_engine import BaseEngine
16 | from .chat_model import ChatModel
17 | 
18 | 
19 | __all__ = ["BaseEngine", "ChatModel"]
20 | 


--------------------------------------------------------------------------------
/src/llamafactory/chat/base_engine.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from abc import ABC, abstractmethod
16 | from collections.abc import AsyncGenerator
17 | from dataclasses import dataclass
18 | from typing import TYPE_CHECKING, Any, Literal, Optional, Union
19 | 
20 | 
21 | if TYPE_CHECKING:
22 |     from transformers import PreTrainedModel, PreTrainedTokenizer
23 |     from vllm import AsyncLLMEngine
24 | 
25 |     from ..data import Template
26 |     from ..data.mm_plugin import AudioInput, ImageInput, VideoInput
27 |     from ..extras.constants import EngineName
28 |     from ..hparams import DataArguments, FinetuningArguments, GeneratingArguments, ModelArguments
29 | 
30 | 
31 | @dataclass
32 | class Response:
33 |     response_text: str
34 |     response_length: int
35 |     prompt_length: int
36 |     finish_reason: Literal["stop", "length"]
37 | 
38 | 
39 | class BaseEngine(ABC):
40 |     r"""Base class for inference engine of chat models.
41 | 
42 |     Must implements async methods: chat(), stream_chat() and get_scores().
43 |     """
44 | 
45 |     name: "EngineName"
46 |     model: Union["PreTrainedModel", "AsyncLLMEngine"]
47 |     tokenizer: "PreTrainedTokenizer"
48 |     can_generate: bool
49 |     template: "Template"
50 |     generating_args: dict[str, Any]
51 | 
52 |     @abstractmethod
53 |     def __init__(
54 |         self,
55 |         model_args: "ModelArguments",
56 |         data_args: "DataArguments",
57 |         finetuning_args: "FinetuningArguments",
58 |         generating_args: "GeneratingArguments",
59 |     ) -> None:
60 |         r"""Initialize an inference engine."""
61 |         ...
62 | 
63 |     @abstractmethod
64 |     async def chat(
65 |         self,
66 |         messages: list[dict[str, str]],
67 |         system: Optional[str] = None,
68 |         tools: Optional[str] = None,
69 |         images: Optional[list["ImageInput"]] = None,
70 |         videos: Optional[list["VideoInput"]] = None,
71 |         audios: Optional[list["AudioInput"]] = None,
72 |         **input_kwargs,
73 |     ) -> list["Response"]:
74 |         r"""Get a list of responses of the chat model."""
75 |         ...
76 | 
77 |     @abstractmethod
78 |     async def stream_chat(
79 |         self,
80 |         messages: list[dict[str, str]],
81 |         system: Optional[str] = None,
82 |         tools: Optional[str] = None,
83 |         images: Optional[list["ImageInput"]] = None,
84 |         videos: Optional[list["VideoInput"]] = None,
85 |         audios: Optional[list["AudioInput"]] = None,
86 |         **input_kwargs,
87 |     ) -> AsyncGenerator[str, None]:
88 |         r"""Get the response token-by-token of the chat model."""
89 |         ...
90 | 
91 |     @abstractmethod
92 |     async def get_scores(
93 |         self,
94 |         batch_input: list[str],
95 |         **input_kwargs,
96 |     ) -> list[float]:
97 |         r"""Get a list of scores of the reward model."""
98 |         ...
99 | 


--------------------------------------------------------------------------------
/src/llamafactory/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .collator import (
16 |     KTODataCollatorWithPadding,
17 |     MultiModalDataCollatorForSeq2Seq,
18 |     PairwiseDataCollatorWithPadding,
19 |     SFTDataCollatorWith4DAttentionMask,
20 | )
21 | from .data_utils import Role, split_dataset
22 | from .loader import get_dataset
23 | from .template import TEMPLATES, Template, get_template_and_fix_tokenizer
24 | 
25 | 
26 | __all__ = [
27 |     "TEMPLATES",
28 |     "KTODataCollatorWithPadding",
29 |     "MultiModalDataCollatorForSeq2Seq",
30 |     "PairwiseDataCollatorWithPadding",
31 |     "Role",
32 |     "SFTDataCollatorWith4DAttentionMask",
33 |     "Template",
34 |     "get_dataset",
35 |     "get_template_and_fix_tokenizer",
36 |     "split_dataset",
37 | ]
38 | 


--------------------------------------------------------------------------------
/src/llamafactory/data/processor/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .feedback import FeedbackDatasetProcessor
16 | from .pairwise import PairwiseDatasetProcessor
17 | from .pretrain import PretrainDatasetProcessor
18 | from .processor_utils import DatasetProcessor
19 | from .supervised import PackedSupervisedDatasetProcessor, SupervisedDatasetProcessor
20 | from .unsupervised import UnsupervisedDatasetProcessor
21 | 
22 | 
23 | __all__ = [
24 |     "DatasetProcessor",
25 |     "FeedbackDatasetProcessor",
26 |     "PackedSupervisedDatasetProcessor",
27 |     "PairwiseDatasetProcessor",
28 |     "PretrainDatasetProcessor",
29 |     "SupervisedDatasetProcessor",
30 |     "UnsupervisedDatasetProcessor",
31 | ]
32 | 


--------------------------------------------------------------------------------
/src/llamafactory/data/processor/pretrain.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from dataclasses import dataclass
19 | from itertools import chain
20 | from typing import Any
21 | 
22 | from .processor_utils import DatasetProcessor
23 | 
24 | 
25 | @dataclass
26 | class PretrainDatasetProcessor(DatasetProcessor):
27 |     def preprocess_dataset(self, examples: dict[str, list[Any]]) -> dict[str, list[Any]]:
28 |         # build grouped texts with format `X1 X2 X3 ...` if packing is enabled
29 |         eos_token = "<|end_of_text|>" if self.data_args.template == "llama3" else self.tokenizer.eos_token
30 |         text_examples = [messages[0]["content"] + eos_token for messages in examples["_prompt"]]
31 | 
32 |         if not self.data_args.packing:
33 |             if getattr(self.tokenizer, "add_bos_token", False):
34 |                 text_examples = [self.tokenizer.bos_token + example for example in text_examples]
35 | 
36 |             result = self.tokenizer(
37 |                 text_examples, add_special_tokens=False, truncation=True, max_length=self.data_args.cutoff_len
38 |             )
39 |         else:
40 |             tokenized_examples = self.tokenizer(text_examples, add_special_tokens=False)
41 |             concatenated_examples = {k: list(chain(*tokenized_examples[k])) for k in tokenized_examples.keys()}
42 |             total_length = len(concatenated_examples[list(concatenated_examples.keys())[0]])
43 |             block_size = self.data_args.cutoff_len
44 |             total_length = (total_length // block_size) * block_size
45 |             result = {
46 |                 k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
47 |                 for k, t in concatenated_examples.items()
48 |             }
49 |             if getattr(self.tokenizer, "add_bos_token", False):
50 |                 for i in range(len(result["input_ids"])):
51 |                     result["input_ids"][i][0] = self.tokenizer.bos_token_id
52 | 
53 |         return result
54 | 
55 |     def print_data_example(self, example: dict[str, list[int]]) -> None:
56 |         print("input_ids:\n{}".format(example["input_ids"]))
57 |         print("inputs:\n{}".format(self.tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
58 | 


--------------------------------------------------------------------------------
/src/llamafactory/eval/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/eval/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/eval/template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | 
17 | from ..data import Role
18 | from ..extras.constants import CHOICES
19 | 
20 | 
21 | @dataclass
22 | class EvalTemplate:
23 |     system: str
24 |     choice: str
25 |     answer: str
26 | 
27 |     def _parse_example(self, example: dict[str, str]) -> tuple[str, str]:
28 |         r"""Parse eval example.
29 | 
30 |         input: a dict with keys {"question", "A", "B", "C", "D", "answer"}
31 |         output: a tuple of (prompt, response).
32 |         """
33 |         candidates = [self.choice.format(choice=ch, content=example[ch]) for ch in CHOICES if ch in example]
34 |         return "".join([example["question"]] + candidates + [self.answer]), example["answer"]
35 | 
36 |     def format_example(
37 |         self, target_data: dict[str, str], support_set: list[dict[str, str]], subject_name: str
38 |     ) -> list[dict[str, str]]:
39 |         r"""Convert dataset examples to messages."""
40 |         messages = []
41 |         for k in range(len(support_set)):
42 |             prompt, response = self._parse_example(support_set[k])
43 |             messages.append({"role": Role.USER.value, "content": prompt})
44 |             messages.append({"role": Role.ASSISTANT.value, "content": response})
45 | 
46 |         prompt, response = self._parse_example(target_data)
47 |         messages.append({"role": Role.USER.value, "content": prompt})
48 |         messages.append({"role": Role.ASSISTANT.value, "content": response})
49 |         messages[0]["content"] = self.system.format(subject=subject_name) + messages[0]["content"]
50 |         return messages
51 | 
52 | 
53 | eval_templates: dict[str, "EvalTemplate"] = {}
54 | 
55 | 
56 | def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
57 |     eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
58 | 
59 | 
60 | def get_eval_template(name: str) -> "EvalTemplate":
61 |     eval_template = eval_templates.get(name, None)
62 |     assert eval_template is not None, f"Template {name} does not exist."
63 |     return eval_template
64 | 
65 | 
66 | _register_eval_template(
67 |     name="en",
68 |     system="The following are multiple choice questions (with answers) about {subject}.\n\n",
69 |     choice="\n{choice}. {content}",
70 |     answer="\nAnswer:",
71 | )
72 | 
73 | 
74 | _register_eval_template(
75 |     name="zh",
76 |     system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
77 |     choice="\n{choice}. {content}",
78 |     answer="\n答案：",
79 | )
80 | 


--------------------------------------------------------------------------------
/src/llamafactory/extras/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/extras/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/extras/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
 2 | #
 3 | # This code is inspired by the HuggingFace's transformers library.
 4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/commands/env.py
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import os
19 | import platform
20 | 
21 | import accelerate
22 | import datasets
23 | import peft
24 | import torch
25 | import transformers
26 | import trl
27 | from transformers.utils import is_torch_cuda_available, is_torch_npu_available
28 | 
29 | 
30 | VERSION = "0.9.3.dev0"
31 | 
32 | 
33 | def print_env() -> None:
34 |     info = {
35 |         "`llamafactory` version": VERSION,
36 |         "Platform": platform.platform(),
37 |         "Python version": platform.python_version(),
38 |         "PyTorch version": torch.__version__,
39 |         "Transformers version": transformers.__version__,
40 |         "Datasets version": datasets.__version__,
41 |         "Accelerate version": accelerate.__version__,
42 |         "PEFT version": peft.__version__,
43 |         "TRL version": trl.__version__,
44 |     }
45 | 
46 |     if is_torch_cuda_available():
47 |         info["PyTorch version"] += " (GPU)"
48 |         info["GPU type"] = torch.cuda.get_device_name()
49 |         info["GPU number"] = torch.cuda.device_count()
50 |         info["GPU memory"] = f"{torch.cuda.mem_get_info()[1] / (1024**3):.2f}GB"
51 | 
52 |     if is_torch_npu_available():
53 |         info["PyTorch version"] += " (NPU)"
54 |         info["NPU type"] = torch.npu.get_device_name()
55 |         info["CANN version"] = torch.version.cann
56 | 
57 |     try:
58 |         import deepspeed  # type: ignore
59 | 
60 |         info["DeepSpeed version"] = deepspeed.__version__
61 |     except Exception:
62 |         pass
63 | 
64 |     try:
65 |         import bitsandbytes  # type: ignore
66 | 
67 |         info["Bitsandbytes version"] = bitsandbytes.__version__
68 |     except Exception:
69 |         pass
70 | 
71 |     try:
72 |         import vllm
73 | 
74 |         info["vLLM version"] = vllm.__version__
75 |     except Exception:
76 |         pass
77 | 
78 |     try:
79 |         import subprocess
80 | 
81 |         commit_info = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True)
82 |         commit_hash = commit_info.stdout.strip()
83 |         info["Git commit"] = commit_hash
84 |     except Exception:
85 |         pass
86 | 
87 |     if os.path.exists("data"):
88 |         info["Default data directory"] = "detected"
89 |     else:
90 |         info["Default data directory"] = "not detected"
91 | 
92 |     print("\n" + "\n".join([f"- {key}: {value}" for key, value in info.items()]) + "\n")
93 | 


--------------------------------------------------------------------------------
/src/llamafactory/extras/packages.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 HuggingFace Inc. and the LlamaFactory team.
  2 | #
  3 | # This code is inspired by the HuggingFace's transformers library.
  4 | # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import importlib.metadata
 19 | import importlib.util
 20 | from functools import lru_cache
 21 | from typing import TYPE_CHECKING
 22 | 
 23 | from packaging import version
 24 | 
 25 | 
 26 | if TYPE_CHECKING:
 27 |     from packaging.version import Version
 28 | 
 29 | 
 30 | def _is_package_available(name: str) -> bool:
 31 |     return importlib.util.find_spec(name) is not None
 32 | 
 33 | 
 34 | def _get_package_version(name: str) -> "Version":
 35 |     try:
 36 |         return version.parse(importlib.metadata.version(name))
 37 |     except Exception:
 38 |         return version.parse("0.0.0")
 39 | 
 40 | 
 41 | def is_pyav_available():
 42 |     return _is_package_available("av")
 43 | 
 44 | 
 45 | def is_librosa_available():
 46 |     return _is_package_available("librosa")
 47 | 
 48 | 
 49 | def is_fastapi_available():
 50 |     return _is_package_available("fastapi")
 51 | 
 52 | 
 53 | def is_galore_available():
 54 |     return _is_package_available("galore_torch")
 55 | 
 56 | 
 57 | def is_apollo_available():
 58 |     return _is_package_available("apollo_torch")
 59 | 
 60 | 
 61 | def is_gradio_available():
 62 |     return _is_package_available("gradio")
 63 | 
 64 | 
 65 | def is_matplotlib_available():
 66 |     return _is_package_available("matplotlib")
 67 | 
 68 | 
 69 | def is_pillow_available():
 70 |     return _is_package_available("PIL")
 71 | 
 72 | 
 73 | def is_ray_available():
 74 |     return _is_package_available("ray")
 75 | 
 76 | 
 77 | def is_requests_available():
 78 |     return _is_package_available("requests")
 79 | 
 80 | 
 81 | def is_rouge_available():
 82 |     return _is_package_available("rouge_chinese")
 83 | 
 84 | 
 85 | def is_starlette_available():
 86 |     return _is_package_available("sse_starlette")
 87 | 
 88 | 
 89 | @lru_cache
 90 | def is_transformers_version_greater_than(content: str):
 91 |     return _get_package_version("transformers") >= version.parse(content)
 92 | 
 93 | 
 94 | def is_uvicorn_available():
 95 |     return _is_package_available("uvicorn")
 96 | 
 97 | 
 98 | def is_vllm_available():
 99 |     return _is_package_available("vllm")
100 | 
101 | 
102 | def is_sglang_available():
103 |     return _is_package_available("sglang")
104 | 


--------------------------------------------------------------------------------
/src/llamafactory/hparams/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .data_args import DataArguments
16 | from .evaluation_args import EvaluationArguments
17 | from .finetuning_args import FinetuningArguments
18 | from .generating_args import GeneratingArguments
19 | from .model_args import ModelArguments
20 | from .parser import get_eval_args, get_infer_args, get_ray_args, get_train_args, read_args
21 | from .training_args import RayArguments, TrainingArguments
22 | 
23 | 
24 | __all__ = [
25 |     "DataArguments",
26 |     "EvaluationArguments",
27 |     "FinetuningArguments",
28 |     "GeneratingArguments",
29 |     "ModelArguments",
30 |     "RayArguments",
31 |     "TrainingArguments",
32 |     "get_eval_args",
33 |     "get_infer_args",
34 |     "get_ray_args",
35 |     "get_train_args",
36 |     "read_args",
37 | ]
38 | 


--------------------------------------------------------------------------------
/src/llamafactory/hparams/evaluation_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from dataclasses import dataclass, field
17 | from typing import Literal, Optional
18 | 
19 | from datasets import DownloadMode
20 | 
21 | 
22 | @dataclass
23 | class EvaluationArguments:
24 |     r"""Arguments pertaining to specify the evaluation parameters."""
25 | 
26 |     task: str = field(
27 |         metadata={"help": "Name of the evaluation task."},
28 |     )
29 |     task_dir: str = field(
30 |         default="evaluation",
31 |         metadata={"help": "Path to the folder containing the evaluation datasets."},
32 |     )
33 |     batch_size: int = field(
34 |         default=4,
35 |         metadata={"help": "The batch size per GPU for evaluation."},
36 |     )
37 |     seed: int = field(
38 |         default=42,
39 |         metadata={"help": "Random seed to be used with data loaders."},
40 |     )
41 |     lang: Literal["en", "zh"] = field(
42 |         default="en",
43 |         metadata={"help": "Language used at evaluation."},
44 |     )
45 |     n_shot: int = field(
46 |         default=5,
47 |         metadata={"help": "Number of examplars for few-shot learning."},
48 |     )
49 |     save_dir: Optional[str] = field(
50 |         default=None,
51 |         metadata={"help": "Path to save the evaluation results."},
52 |     )
53 |     download_mode: DownloadMode = field(
54 |         default=DownloadMode.REUSE_DATASET_IF_EXISTS,
55 |         metadata={"help": "Download mode used for the evaluation datasets."},
56 |     )
57 | 
58 |     def __post_init__(self):
59 |         if self.save_dir is not None and os.path.exists(self.save_dir):
60 |             raise ValueError("`save_dir` already exists, use another one.")
61 | 


--------------------------------------------------------------------------------
/src/llamafactory/hparams/generating_args.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import asdict, dataclass, field
16 | from typing import Any
17 | 
18 | from transformers import GenerationConfig
19 | 
20 | 
21 | @dataclass
22 | class GeneratingArguments:
23 |     r"""Arguments pertaining to specify the decoding parameters."""
24 | 
25 |     do_sample: bool = field(
26 |         default=True,
27 |         metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."},
28 |     )
29 |     temperature: float = field(
30 |         default=0.95,
31 |         metadata={"help": "The value used to modulate the next token probabilities."},
32 |     )
33 |     top_p: float = field(
34 |         default=0.7,
35 |         metadata={
36 |             "help": (
37 |                 "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."
38 |             )
39 |         },
40 |     )
41 |     top_k: int = field(
42 |         default=50,
43 |         metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."},
44 |     )
45 |     num_beams: int = field(
46 |         default=1,
47 |         metadata={"help": "Number of beams for beam search. 1 means no beam search."},
48 |     )
49 |     max_length: int = field(
50 |         default=1024,
51 |         metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."},
52 |     )
53 |     max_new_tokens: int = field(
54 |         default=1024,
55 |         metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."},
56 |     )
57 |     repetition_penalty: float = field(
58 |         default=1.0,
59 |         metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."},
60 |     )
61 |     length_penalty: float = field(
62 |         default=1.0,
63 |         metadata={"help": "Exponential penalty to the length that is used with beam-based generation."},
64 |     )
65 |     skip_special_tokens: bool = field(
66 |         default=True,
67 |         metadata={"help": "Whether or not to remove special tokens in the decoding."},
68 |     )
69 | 
70 |     def to_dict(self, obey_generation_config: bool = False) -> dict[str, Any]:
71 |         args = asdict(self)
72 |         if args.get("max_new_tokens", -1) > 0:
73 |             args.pop("max_length", None)
74 |         else:
75 |             args.pop("max_new_tokens", None)
76 | 
77 |         if obey_generation_config:
78 |             generation_config = GenerationConfig()
79 |             for key in list(args.keys()):
80 |                 if not hasattr(generation_config, key):
81 |                     args.pop(key)
82 | 
83 |         return args
84 | 


--------------------------------------------------------------------------------
/src/llamafactory/launcher.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp  # use absolute import
16 | 
17 | 
18 | def launch():
19 |     run_exp()
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     launch()
24 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .loader import load_config, load_model, load_tokenizer
16 | from .model_utils.misc import find_all_linear_modules
17 | from .model_utils.quantization import QuantizationMethod
18 | from .model_utils.valuehead import load_valuehead_params
19 | 
20 | 
21 | __all__ = [
22 |     "QuantizationMethod",
23 |     "find_all_linear_modules",
24 |     "load_config",
25 |     "load_model",
26 |     "load_tokenizer",
27 |     "load_valuehead_params",
28 | ]
29 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/model/model_utils/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/embedding.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import math
16 | from contextlib import nullcontext
17 | from typing import TYPE_CHECKING
18 | 
19 | import torch
20 | from transformers.integrations import is_deepspeed_zero3_enabled
21 | 
22 | from ...extras import logging
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from transformers import PreTrainedModel, PreTrainedTokenizer
27 | 
28 | 
29 | logger = logging.get_logger(__name__)
30 | 
31 | 
32 | def _noisy_mean_initialization(embed_weight: "torch.Tensor", num_new_tokens: int) -> None:
33 |     embedding_dim = embed_weight.size(1)
34 |     avg_weight = embed_weight[:-num_new_tokens].mean(dim=0, keepdim=True)
35 |     noise_weight = torch.empty_like(embed_weight[-num_new_tokens:])
36 |     noise_weight.normal_(mean=0, std=(1.0 / math.sqrt(embedding_dim)))
37 |     embed_weight[-num_new_tokens:] = avg_weight + noise_weight
38 | 
39 | 
40 | def resize_embedding_layer(model: "PreTrainedModel", tokenizer: "PreTrainedTokenizer") -> None:
41 |     r"""Resize token embeddings."""
42 |     if is_deepspeed_zero3_enabled():
43 |         import deepspeed  # type: ignore
44 | 
45 |         params = [model.get_input_embeddings().weight]
46 |         if model.get_output_embeddings() is not None and not model.config.tie_word_embeddings:
47 |             params.append(model.get_output_embeddings().weight)
48 | 
49 |         context_maybe_zero3 = deepspeed.zero.GatheredParameters(params, modifier_rank=0)
50 |     else:
51 |         context_maybe_zero3 = nullcontext()
52 | 
53 |     with context_maybe_zero3:
54 |         current_embedding_size = model.get_input_embeddings().weight.size(0)
55 | 
56 |     if len(tokenizer) > current_embedding_size:
57 |         if getattr(model, "quantization_method", None):
58 |             raise ValueError("Cannot resize embedding layers of a quantized model.")
59 | 
60 |         if not isinstance(model.get_output_embeddings(), torch.nn.Linear):
61 |             raise ValueError("Current model does not support resizing embedding layers.")
62 | 
63 |         model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=64)
64 |         with context_maybe_zero3:
65 |             new_embedding_size = model.get_input_embeddings().weight.size(0)
66 |             num_new_tokens = new_embedding_size - current_embedding_size
67 |             _noisy_mean_initialization(model.get_input_embeddings().weight.data, num_new_tokens)
68 |             _noisy_mean_initialization(model.get_output_embeddings().weight.data, num_new_tokens)
69 | 
70 |         logger.info_rank0(f"Resized token embeddings from {current_embedding_size} to {new_embedding_size}.")
71 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/kv_cache.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ...extras import logging
18 | 
19 | 
20 | logger = logging.get_logger(__name__)
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from transformers import PretrainedConfig
25 | 
26 |     from ...hparams import ModelArguments
27 | 
28 | 
29 | def configure_kv_cache(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
30 |     if not is_trainable:
31 |         setattr(config, "use_cache", model_args.use_cache)
32 |         if hasattr(config, "text_config"):
33 |             setattr(config.text_config, "use_cache", model_args.use_cache)
34 | 
35 |         if model_args.use_cache:
36 |             logger.info_rank0("KV cache is enabled for faster generation.")
37 |         else:
38 |             logger.info_rank0("KV cache is disabled.")
39 |     else:
40 |         setattr(config, "use_cache", False)
41 |         if hasattr(config, "text_config"):
42 |             setattr(config.text_config, "use_cache", False)
43 | 
44 |         logger.info_rank0("KV cache is disabled during training.")
45 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/mod.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ...extras.constants import MOD_SUPPORTED_MODELS
18 | 
19 | 
20 | if TYPE_CHECKING:
21 |     from transformers import PretrainedConfig, PreTrainedModel
22 | 
23 |     from ...hparams import ModelArguments
24 | 
25 | 
26 | def load_mod_pretrained_model(**init_kwargs) -> "PreTrainedModel":
27 |     from MoD import AutoMoDModelForCausalLM
28 | 
29 |     return AutoMoDModelForCausalLM.from_pretrained(**init_kwargs)
30 | 
31 | 
32 | def convert_pretrained_model_to_mod(
33 |     model: "PreTrainedModel", config: "PretrainedConfig", model_args: "ModelArguments"
34 | ) -> "PreTrainedModel":
35 |     from MoD import apply_mod_to_hf
36 | 
37 |     if getattr(config, "model_type", None) not in MOD_SUPPORTED_MODELS:
38 |         raise ValueError("Current model is not supported by mixture-of-depth.")
39 | 
40 |     model = apply_mod_to_hf(model)
41 |     model = model.to(model_args.compute_dtype)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/rope.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 LMSYS and the LlamaFactory team.
 2 | # Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
 3 | #
 4 | # This code is inspired by the LMSYS's FastChat library.
 5 | # https://github.com/lm-sys/FastChat/blob/v0.2.30/fastchat/train/train.py
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | import math
20 | from typing import TYPE_CHECKING
21 | 
22 | from ...extras import logging
23 | from ...extras.constants import RopeScaling
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from transformers import PretrainedConfig
28 | 
29 |     from ...hparams import ModelArguments
30 | 
31 | 
32 | logger = logging.get_logger(__name__)
33 | 
34 | 
35 | def configure_rope(config: "PretrainedConfig", model_args: "ModelArguments") -> None:
36 |     if model_args.rope_scaling is None:
37 |         return
38 | 
39 |     if not hasattr(config, "rope_scaling"):
40 |         logger.warning_rank0("Current model does not support RoPE scaling.")
41 |         return
42 | 
43 |     if hasattr(config, "max_position_embeddings"):
44 |         old_max_length = getattr(config, "max_position_embeddings", None)
45 |     else:
46 |         logger.warning_rank0("Cannot find the max position embeddings in the config.")
47 |         return
48 | 
49 |     if model_args.model_max_length is not None:  # training
50 |         if model_args.model_max_length <= old_max_length:
51 |             logger.warning_rank0("Input length is smaller than max length. Disabling rope scaling.")
52 |             return
53 | 
54 |         if model_args.rope_scaling == RopeScaling.DYNAMIC:
55 |             logger.warning_rank0(
56 |                 "Dynamic NTK scaling may not work well with fine-tuning. "
57 |                 "See: https://github.com/huggingface/transformers/pull/24653"
58 |             )
59 | 
60 |         rope_factor = float(math.ceil(model_args.model_max_length / old_max_length))
61 |     else:  # inference
62 |         rope_factor = 2.0
63 | 
64 |     rope_kwargs = {
65 |         "rope_type": getattr(model_args.rope_scaling, "value", model_args.rope_scaling),  # handle enum
66 |         "factor": rope_factor,
67 |     }
68 |     setattr(config, "max_position_embeddings", old_max_length * rope_factor)
69 |     logger.info_rank0(f"Enlarge max model length from {old_max_length} to {old_max_length * rope_factor}.")
70 | 
71 |     if model_args.rope_scaling in [RopeScaling.DYNAMIC, RopeScaling.YARN]:
72 |         rope_kwargs["original_max_position_embeddings"] = old_max_length
73 |     elif model_args.rope_scaling == RopeScaling.LLAMA3:
74 |         rope_kwargs["original_max_position_embeddings"] = old_max_length
75 |         rope_kwargs["low_freq_factor"] = 1.0
76 |         rope_kwargs["high_freq_factor"] = 4.0
77 | 
78 |     setattr(config, "rope_scaling", rope_kwargs)
79 |     logger.info_rank0(
80 |         f"Using {rope_kwargs['rope_type']} scaling strategy and setting scaling factor to {rope_kwargs['factor']}."
81 |     )
82 | 


--------------------------------------------------------------------------------
/src/llamafactory/model/model_utils/valuehead.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | import torch
18 | from transformers.utils import cached_file
19 | 
20 | from ...extras import logging
21 | from ...extras.constants import V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
22 | 
23 | 
24 | if TYPE_CHECKING:
25 |     from transformers import PreTrainedModel
26 | 
27 |     from ...hparams import ModelArguments
28 | 
29 | 
30 | logger = logging.get_logger(__name__)
31 | 
32 | 
33 | def load_valuehead_params(path_or_repo_id: str, model_args: "ModelArguments") -> dict[str, torch.Tensor]:
34 |     r"""Load value head parameters from Hugging Face Hub or local disk.
35 | 
36 |     Returns: dict with keys `v_head.summary.weight` and `v_head.summary.bias`.
37 |     """
38 |     kwargs = {"path_or_repo_id": path_or_repo_id, "cache_dir": model_args.cache_dir, "token": model_args.hf_hub_token}
39 |     err_text = ""
40 | 
41 |     try:
42 |         from safetensors import safe_open
43 | 
44 |         vhead_file = cached_file(filename=V_HEAD_SAFE_WEIGHTS_NAME, **kwargs)
45 |         with safe_open(vhead_file, framework="pt", device="cpu") as f:
46 |             return {key: f.get_tensor(key) for key in f.keys()}
47 |     except Exception as err:
48 |         err_text = str(err)
49 | 
50 |     try:
51 |         vhead_file = cached_file(filename=V_HEAD_WEIGHTS_NAME, **kwargs)
52 |         return torch.load(vhead_file, map_location="cpu")
53 |     except Exception as err:
54 |         err_text = str(err)
55 | 
56 |     logger.info_rank0(f"Provided path ({path_or_repo_id}) does not contain value head weights: {err_text}.")
57 |     logger.info_rank0("Ignore the above message if you are not resuming the training of a value head model.")
58 |     return None
59 | 
60 | 
61 | def prepare_valuehead_model(model: "PreTrainedModel") -> None:
62 |     if getattr(model.config, "model_type", None) == "llava":
63 |         setattr(model, "lm_head", model.language_model.get_output_embeddings())
64 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
65 | 
66 |     if getattr(model.config, "model_type", None) == "chatglm":
67 |         setattr(model, "lm_head", model.transformer.output_layer)
68 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
69 | 
70 |     if getattr(model.config, "model_type", None) == "internlm2":
71 |         setattr(model, "lm_head", model.output)
72 |         setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
73 | 


--------------------------------------------------------------------------------
/src/llamafactory/third_party/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/third_party/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/third_party/muon/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .muon import Muon
16 | 
17 | 
18 | __all__ = ["Muon"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/train/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/train/dpo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_dpo
16 | 
17 | 
18 | __all__ = ["run_dpo"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/kto/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_kto
16 | 
17 | 
18 | __all__ = ["run_kto"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/ppo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_ppo
16 | 
17 | 
18 | __all__ = ["run_ppo"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/pt/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_pt
16 | 
17 | 
18 | __all__ = ["run_pt"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/pt/trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from types import MethodType
16 | from typing import TYPE_CHECKING, Optional
17 | 
18 | import torch
19 | from transformers import Trainer
20 | from typing_extensions import override
21 | 
22 | from ...extras.packages import is_transformers_version_greater_than
23 | from ..callbacks import SaveProcessorCallback
24 | from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
25 | 
26 | 
27 | if TYPE_CHECKING:
28 |     from transformers import ProcessorMixin
29 | 
30 |     from ...hparams import FinetuningArguments
31 | 
32 | 
33 | class CustomTrainer(Trainer):
34 |     r"""Inherit Trainer for custom optimizer."""
35 | 
36 |     def __init__(
37 |         self, finetuning_args: "FinetuningArguments", processor: Optional["ProcessorMixin"], **kwargs
38 |     ) -> None:
39 |         if is_transformers_version_greater_than("4.46"):
40 |             kwargs["processing_class"] = kwargs.pop("tokenizer")
41 | 
42 |         super().__init__(**kwargs)
43 |         if processor is not None:
44 |             # avoid wrong loss under gradient accumulation
45 |             # https://github.com/huggingface/transformers/pull/36044#issuecomment-2746657112
46 |             self.model_accepts_loss_kwargs = False
47 | 
48 |         self.finetuning_args = finetuning_args
49 | 
50 |         if processor is not None:
51 |             self.add_callback(SaveProcessorCallback(processor))
52 | 
53 |         if finetuning_args.use_badam:
54 |             from badam import BAdamCallback, clip_grad_norm_old_version  # type: ignore
55 | 
56 |             self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator)
57 |             self.add_callback(BAdamCallback)
58 | 
59 |     @override
60 |     def create_optimizer(self) -> "torch.optim.Optimizer":
61 |         if self.optimizer is None:
62 |             self.optimizer = create_custom_optimizer(self.model, self.args, self.finetuning_args)
63 |         return super().create_optimizer()
64 | 
65 |     @override
66 |     def create_scheduler(
67 |         self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
68 |     ) -> "torch.optim.lr_scheduler.LRScheduler":
69 |         create_custom_scheduler(self.args, num_training_steps, optimizer)
70 |         return super().create_scheduler(num_training_steps, optimizer)
71 | 
72 |     @override
73 |     def _get_train_sampler(self, *args, **kwargs) -> Optional["torch.utils.data.Sampler"]:
74 |         if self.finetuning_args.disable_shuffling:
75 |             return torch.utils.data.SequentialSampler(self.train_dataset)
76 | 
77 |         return super()._get_train_sampler(*args, **kwargs)
78 | 
79 |     @override
80 |     def compute_loss(self, model, inputs, *args, **kwargs):
81 |         return super().compute_loss(model, inputs, *args, **kwargs)
82 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/rm/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_rm
16 | 
17 | 
18 | __all__ = ["run_rm"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/rm/metric.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass
16 | from typing import TYPE_CHECKING, Optional
17 | 
18 | import numpy as np
19 | 
20 | from ...extras.misc import numpify
21 | 
22 | 
23 | if TYPE_CHECKING:
24 |     from transformers import EvalPrediction
25 | 
26 | 
27 | @dataclass
28 | class ComputeAccuracy:
29 |     r"""Compute reward accuracy and support `batch_eval_metrics`."""
30 | 
31 |     def _dump(self) -> Optional[dict[str, float]]:
32 |         result = None
33 |         if hasattr(self, "score_dict"):
34 |             result = {k: float(np.mean(v)) for k, v in self.score_dict.items()}
35 | 
36 |         self.score_dict = {"accuracy": []}
37 |         return result
38 | 
39 |     def __post_init__(self):
40 |         self._dump()
41 | 
42 |     def __call__(self, eval_preds: "EvalPrediction", compute_result: bool = True) -> Optional[dict[str, float]]:
43 |         chosen_scores, rejected_scores = numpify(eval_preds.predictions[0]), numpify(eval_preds.predictions[1])
44 |         if not chosen_scores.shape:
45 |             self.score_dict["accuracy"].append(chosen_scores > rejected_scores)
46 |         else:
47 |             for i in range(len(chosen_scores)):
48 |                 self.score_dict["accuracy"].append(chosen_scores[i] > rejected_scores[i])
49 | 
50 |         if compute_result:
51 |             return self._dump()
52 | 


--------------------------------------------------------------------------------
/src/llamafactory/train/sft/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .workflow import run_sft
16 | 
17 | 
18 | __all__ = ["run_sft"]
19 | 


--------------------------------------------------------------------------------
/src/llamafactory/webui/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hiyouga/LLaMA-Factory/65aa86ed3978151eb01bd5ef2c8df20e178a7d30/src/llamafactory/webui/__init__.py


--------------------------------------------------------------------------------
/src/llamafactory/webui/components/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .chatbot import create_chat_box
16 | from .eval import create_eval_tab
17 | from .export import create_export_tab
18 | from .infer import create_infer_tab
19 | from .top import create_top
20 | from .train import create_train_tab
21 | 
22 | 
23 | __all__ = [
24 |     "create_chat_box",
25 |     "create_eval_tab",
26 |     "create_export_tab",
27 |     "create_infer_tab",
28 |     "create_top",
29 |     "create_train_tab",
30 | ]
31 | 


--------------------------------------------------------------------------------
/src/llamafactory/webui/components/infer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from ...extras.packages import is_gradio_available
18 | from ..common import is_multimodal
19 | from .chatbot import create_chat_box
20 | 
21 | 
22 | if is_gradio_available():
23 |     import gradio as gr
24 | 
25 | 
26 | if TYPE_CHECKING:
27 |     from gradio.components import Component
28 | 
29 |     from ..engine import Engine
30 | 
31 | 
32 | def create_infer_tab(engine: "Engine") -> dict[str, "Component"]:
33 |     input_elems = engine.manager.get_base_elems()
34 |     elem_dict = dict()
35 | 
36 |     with gr.Row():
37 |         infer_backend = gr.Dropdown(choices=["huggingface", "vllm", "sglang"], value="huggingface")
38 |         infer_dtype = gr.Dropdown(choices=["auto", "float16", "bfloat16", "float32"], value="auto")
39 |         extra_args = gr.Textbox(value='{"vllm_enforce_eager": true}')
40 | 
41 |     with gr.Row():
42 |         load_btn = gr.Button()
43 |         unload_btn = gr.Button()
44 | 
45 |     info_box = gr.Textbox(show_label=False, interactive=False)
46 | 
47 |     input_elems.update({infer_backend, infer_dtype, extra_args})
48 |     elem_dict.update(
49 |         dict(
50 |             infer_backend=infer_backend,
51 |             infer_dtype=infer_dtype,
52 |             extra_args=extra_args,
53 |             load_btn=load_btn,
54 |             unload_btn=unload_btn,
55 |             info_box=info_box,
56 |         )
57 |     )
58 | 
59 |     chatbot, messages, chat_elems = create_chat_box(engine, visible=False)
60 |     elem_dict.update(chat_elems)
61 | 
62 |     load_btn.click(engine.chatter.load_model, input_elems, [info_box]).then(
63 |         lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]]
64 |     )
65 | 
66 |     unload_btn.click(engine.chatter.unload_model, input_elems, [info_box]).then(
67 |         lambda: ([], []), outputs=[chatbot, messages]
68 |     ).then(lambda: gr.Column(visible=engine.chatter.loaded), outputs=[chat_elems["chat_box"]])
69 | 
70 |     engine.manager.get_elem_by_id("top.model_name").change(
71 |         lambda model_name: gr.Column(visible=is_multimodal(model_name)),
72 |         [engine.manager.get_elem_by_id("top.model_name")],
73 |         [chat_elems["mm_box"]],
74 |     )
75 | 
76 |     return elem_dict
77 | 


--------------------------------------------------------------------------------
/src/llamafactory/webui/css.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CSS = r"""
16 | .duplicate-button {
17 |   margin: auto !important;
18 |   color: white !important;
19 |   background: black !important;
20 |   border-radius: 100vh !important;
21 | }
22 | 
23 | .thinking-summary {
24 |   padding: 8px !important;
25 | }
26 | 
27 | .thinking-summary span {
28 |   border-radius: 4px !important;
29 |   padding: 4px !important;
30 |   cursor: pointer !important;
31 |   font-size: 14px !important;
32 |   background: rgb(245, 245, 245) !important;
33 | }
34 | 
35 | .dark .thinking-summary span {
36 |   background: rgb(73, 73, 73) !important;
37 | }
38 | 
39 | .thinking-container {
40 |   border-left: 2px solid #a6a6a6 !important;
41 |   padding-left: 10px !important;
42 |   margin: 4px 0 !important;
43 | }
44 | 
45 | .thinking-container p {
46 |   color: #a6a6a6 !important;
47 | }
48 | 
49 | .modal-box {
50 |   position: fixed !important;
51 |   top: 50%;
52 |   left: 50%;
53 |   transform: translate(-50%, -50%); /* center horizontally */
54 |   max-width: 1000px;
55 |   max-height: 750px;
56 |   overflow-y: auto;
57 |   background-color: var(--input-background-fill);
58 |   flex-wrap: nowrap !important;
59 |   border: 2px solid black !important;
60 |   z-index: 1000;
61 |   padding: 10px;
62 | }
63 | 
64 | .dark .modal-box {
65 |   border: 2px solid white !important;
66 | }
67 | """
68 | 


--------------------------------------------------------------------------------
/src/llamafactory/webui/manager.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections.abc import Generator
16 | from typing import TYPE_CHECKING
17 | 
18 | 
19 | if TYPE_CHECKING:
20 |     from gradio.components import Component
21 | 
22 | 
23 | class Manager:
24 |     r"""A class to manage all the gradio components in Web UI."""
25 | 
26 |     def __init__(self) -> None:
27 |         self._id_to_elem: dict[str, Component] = {}
28 |         self._elem_to_id: dict[Component, str] = {}
29 | 
30 |     def add_elems(self, tab_name: str, elem_dict: dict[str, "Component"]) -> None:
31 |         r"""Add elements to manager."""
32 |         for elem_name, elem in elem_dict.items():
33 |             elem_id = f"{tab_name}.{elem_name}"
34 |             self._id_to_elem[elem_id] = elem
35 |             self._elem_to_id[elem] = elem_id
36 | 
37 |     def get_elem_list(self) -> list["Component"]:
38 |         r"""Return the list of all elements."""
39 |         return list(self._id_to_elem.values())
40 | 
41 |     def get_elem_iter(self) -> Generator[tuple[str, "Component"], None, None]:
42 |         r"""Return an iterator over all elements with their names."""
43 |         for elem_id, elem in self._id_to_elem.items():
44 |             yield elem_id.split(".")[-1], elem
45 | 
46 |     def get_elem_by_id(self, elem_id: str) -> "Component":
47 |         r"""Get element by id.
48 | 
49 |         Example: top.lang, train.dataset
50 |         """
51 |         return self._id_to_elem[elem_id]
52 | 
53 |     def get_id_by_elem(self, elem: "Component") -> str:
54 |         r"""Get id by element."""
55 |         return self._elem_to_id[elem]
56 | 
57 |     def get_base_elems(self) -> set["Component"]:
58 |         r"""Get the base elements that are commonly used."""
59 |         return {
60 |             self._id_to_elem["top.lang"],
61 |             self._id_to_elem["top.model_name"],
62 |             self._id_to_elem["top.model_path"],
63 |             self._id_to_elem["top.finetuning_type"],
64 |             self._id_to_elem["top.checkpoint_path"],
65 |             self._id_to_elem["top.quantization_bit"],
66 |             self._id_to_elem["top.quantization_method"],
67 |             self._id_to_elem["top.template"],
68 |             self._id_to_elem["top.rope_scaling"],
69 |             self._id_to_elem["top.booster"],
70 |         }
71 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.train.tuner import run_exp
16 | 
17 | 
18 | def main():
19 |     run_exp()
20 | 
21 | 
22 | def _mp_fn(index):
23 |     # For xla_spawn (TPUs)
24 |     run_exp()
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/src/webui.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.extras.misc import fix_proxy, is_env_enabled
18 | from llamafactory.webui.interface import create_ui
19 | 
20 | 
21 | def main():
22 |     gradio_ipv6 = is_env_enabled("GRADIO_IPV6")
23 |     gradio_share = is_env_enabled("GRADIO_SHARE")
24 |     server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
25 |     print("Visit http://ip:port for Web UI, e.g., http://127.0.0.1:7860")
26 |     fix_proxy(ipv6_enabled=gradio_ipv6)
27 |     create_ui().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/tests/check_license.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | from pathlib import Path
17 | 
18 | 
19 | KEYWORDS = ("Copyright", "2025", "LlamaFactory")
20 | 
21 | 
22 | def main():
23 |     path_list: list[Path] = []
24 |     for check_dir in sys.argv[1:]:
25 |         path_list.extend(Path(check_dir).glob("**/*.py"))
26 | 
27 |     for path in path_list:
28 |         with open(path.absolute(), encoding="utf-8") as f:
29 |             file_content = f.read().strip().split("\n")
30 |             if not file_content[0]:
31 |                 continue
32 | 
33 |             print(f"Check license: {path}")
34 |             assert all(keyword in file_content[0] for keyword in KEYWORDS), f"File {path} does not contain license."
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/tests/data/processor/test_feedback.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.extras.constants import IGNORE_INDEX
23 | from llamafactory.train.test_utils import load_dataset_module
24 | 
25 | 
26 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
27 | 
28 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
29 | 
30 | TRAIN_ARGS = {
31 |     "model_name_or_path": TINY_LLAMA3,
32 |     "stage": "kto",
33 |     "do_train": True,
34 |     "finetuning_type": "full",
35 |     "dataset": "kto_en_demo",
36 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
37 |     "template": "llama3",
38 |     "cutoff_len": 8192,
39 |     "output_dir": "dummy_dir",
40 |     "overwrite_output_dir": True,
41 |     "fp16": True,
42 | }
43 | 
44 | 
45 | @pytest.mark.parametrize("num_samples", [16])
46 | def test_feedback_data(num_samples: int):
47 |     train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
48 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
49 |     original_data = load_dataset(DEMO_DATA, name="kto_en_demo", split="train")
50 |     indexes = random.choices(range(len(original_data)), k=num_samples)
51 |     for index in indexes:
52 |         messages = original_data["messages"][index]
53 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages)
54 |         prompt_len = len(ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True))
55 |         ref_labels = [IGNORE_INDEX] * prompt_len + ref_input_ids[prompt_len:]
56 |         assert train_dataset["input_ids"][index] == ref_input_ids
57 |         assert train_dataset["labels"][index] == ref_labels
58 |         assert train_dataset["kto_tags"][index] == original_data["label"][index]
59 | 


--------------------------------------------------------------------------------
/tests/data/processor/test_processor_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import pytest
17 | 
18 | from llamafactory.data.processor.processor_utils import infer_seqlen
19 | 
20 | 
21 | @pytest.mark.parametrize(
22 |     "test_input,test_output",
23 |     [
24 |         ((3000, 2000, 1000), (600, 400)),
25 |         ((2000, 3000, 1000), (400, 600)),
26 |         ((1000, 100, 1000), (900, 100)),
27 |         ((100, 1000, 1000), (100, 900)),
28 |         ((100, 500, 1000), (100, 500)),
29 |         ((500, 100, 1000), (500, 100)),
30 |         ((10, 10, 1000), (10, 10)),
31 |     ],
32 | )
33 | def test_infer_seqlen(test_input: tuple[int, int, int], test_output: tuple[int, int]):
34 |     assert test_output == infer_seqlen(*test_input)
35 | 


--------------------------------------------------------------------------------
/tests/data/processor/test_unsupervised.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import random
17 | 
18 | import pytest
19 | from datasets import load_dataset
20 | from transformers import AutoTokenizer
21 | 
22 | from llamafactory.train.test_utils import load_dataset_module
23 | 
24 | 
25 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
26 | 
27 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
28 | 
29 | TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
30 | 
31 | TRAIN_ARGS = {
32 |     "model_name_or_path": TINY_LLAMA3,
33 |     "stage": "ppo",
34 |     "do_train": True,
35 |     "finetuning_type": "full",
36 |     "reward_model": "",
37 |     "reward_model_type": "full",
38 |     "dataset": "system_chat",
39 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
40 |     "template": "llama3",
41 |     "cutoff_len": 8192,
42 |     "output_dir": "dummy_dir",
43 |     "overwrite_output_dir": True,
44 |     "fp16": True,
45 | }
46 | 
47 | 
48 | @pytest.mark.parametrize("num_samples", [16])
49 | def test_unsupervised_data(num_samples: int):
50 |     train_dataset = load_dataset_module(**TRAIN_ARGS)["train_dataset"]
51 |     ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA3)
52 |     original_data = load_dataset(DEMO_DATA, name="system_chat", split="train")
53 |     indexes = random.choices(range(len(original_data)), k=num_samples)
54 |     for index in indexes:
55 |         messages = original_data["messages"][index]
56 |         ref_ids = ref_tokenizer.apply_chat_template(messages)
57 |         ref_input_ids = ref_tokenizer.apply_chat_template(messages[:-1], add_generation_prompt=True)
58 |         ref_labels = ref_ids[len(ref_input_ids) :]
59 |         assert train_dataset["input_ids"][index] == ref_input_ids
60 |         assert train_dataset["labels"][index] == ref_labels
61 | 


--------------------------------------------------------------------------------
/tests/data/test_converter.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.data import Role
16 | from llamafactory.data.converter import get_dataset_converter
17 | from llamafactory.data.parser import DatasetAttr
18 | from llamafactory.hparams import DataArguments
19 | 
20 | 
21 | def test_alpaca_converter():
22 |     dataset_attr = DatasetAttr("hf_hub", "llamafactory/tiny-supervised-dataset")
23 |     data_args = DataArguments()
24 |     example = {
25 |         "instruction": "Solve the math problem.",
26 |         "input": "3 + 4",
27 |         "output": "The answer is 7.",
28 |     }
29 |     dataset_converter = get_dataset_converter("alpaca", dataset_attr, data_args)
30 |     assert dataset_converter(example) == {
31 |         "_prompt": [{"role": Role.USER.value, "content": "Solve the math problem.\n3 + 4"}],
32 |         "_response": [{"role": Role.ASSISTANT.value, "content": "The answer is 7."}],
33 |         "_system": "",
34 |         "_tools": "",
35 |         "_images": None,
36 |         "_videos": None,
37 |         "_audios": None,
38 |     }
39 | 
40 | 
41 | def test_sharegpt_converter():
42 |     dataset_attr = DatasetAttr("hf_hub", "llamafactory/tiny-supervised-dataset")
43 |     data_args = DataArguments()
44 |     example = {
45 |         "conversations": [
46 |             {"from": "system", "value": "You are a helpful assistant."},
47 |             {"from": "human", "value": "Solve the math problem.\n3 + 4"},
48 |             {"from": "gpt", "value": "The answer is 7."},
49 |         ]
50 |     }
51 |     dataset_converter = get_dataset_converter("sharegpt", dataset_attr, data_args)
52 |     assert dataset_converter(example) == {
53 |         "_prompt": [{"role": Role.USER.value, "content": "Solve the math problem.\n3 + 4"}],
54 |         "_response": [{"role": Role.ASSISTANT.value, "content": "The answer is 7."}],
55 |         "_system": "You are a helpful assistant.",
56 |         "_tools": "",
57 |         "_images": None,
58 |         "_videos": None,
59 |         "_audios": None,
60 |     }
61 | 


--------------------------------------------------------------------------------
/tests/data/test_loader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.train.test_utils import load_dataset_module
18 | 
19 | 
20 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
21 | 
22 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_DATA = os.getenv("TINY_DATA", "llamafactory/tiny-supervised-dataset")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA3,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "full",
31 |     "template": "llama3",
32 |     "dataset": TINY_DATA,
33 |     "dataset_dir": "ONLINE",
34 |     "cutoff_len": 8192,
35 |     "output_dir": "dummy_dir",
36 |     "overwrite_output_dir": True,
37 |     "fp16": True,
38 | }
39 | 
40 | 
41 | def test_load_train_only():
42 |     dataset_module = load_dataset_module(**TRAIN_ARGS)
43 |     assert dataset_module.get("train_dataset") is not None
44 |     assert dataset_module.get("eval_dataset") is None
45 | 
46 | 
47 | def test_load_val_size():
48 |     dataset_module = load_dataset_module(val_size=0.1, **TRAIN_ARGS)
49 |     assert dataset_module.get("train_dataset") is not None
50 |     assert dataset_module.get("eval_dataset") is not None
51 | 
52 | 
53 | def test_load_eval_data():
54 |     dataset_module = load_dataset_module(eval_dataset=TINY_DATA, **TRAIN_ARGS)
55 |     assert dataset_module.get("train_dataset") is not None
56 |     assert dataset_module.get("eval_dataset") is not None
57 | 


--------------------------------------------------------------------------------
/tests/e2e/test_chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | from llamafactory.chat import ChatModel
18 | 
19 | 
20 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
21 | 
22 | INFER_ARGS = {
23 |     "model_name_or_path": TINY_LLAMA3,
24 |     "finetuning_type": "lora",
25 |     "template": "llama3",
26 |     "infer_dtype": "float16",
27 |     "do_sample": False,
28 |     "max_new_tokens": 1,
29 | }
30 | 
31 | MESSAGES = [
32 |     {"role": "user", "content": "Hi"},
33 | ]
34 | 
35 | EXPECTED_RESPONSE = "_rho"
36 | 
37 | 
38 | def test_chat():
39 |     chat_model = ChatModel(INFER_ARGS)
40 |     assert chat_model.chat(MESSAGES)[0].response_text == EXPECTED_RESPONSE
41 | 
42 | 
43 | def test_stream_chat():
44 |     chat_model = ChatModel(INFER_ARGS)
45 |     response = ""
46 |     for token in chat_model.stream_chat(MESSAGES):
47 |         response += token
48 | 
49 |     assert response == EXPECTED_RESPONSE
50 | 


--------------------------------------------------------------------------------
/tests/e2e/test_sglang.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | 
17 | import pytest
18 | 
19 | from llamafactory.chat import ChatModel
20 | from llamafactory.extras.packages import is_sglang_available
21 | 
22 | 
23 | MODEL_NAME = "Qwen/Qwen2.5-0.5B"
24 | 
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": MODEL_NAME,
28 |     "finetuning_type": "lora",
29 |     "template": "llama3",
30 |     "infer_dtype": "float16",
31 |     "infer_backend": "sglang",
32 |     "do_sample": False,
33 |     "max_new_tokens": 1,
34 | }
35 | 
36 | 
37 | MESSAGES = [
38 |     {"role": "user", "content": "Hi"},
39 | ]
40 | 
41 | 
42 | @pytest.mark.skipif(not is_sglang_available(), reason="SGLang is not installed")
43 | def test_chat():
44 |     r"""Test the SGLang engine's basic chat functionality."""
45 |     chat_model = ChatModel(INFER_ARGS)
46 |     response = chat_model.chat(MESSAGES)[0]
47 |     # TODO: Change to EXPECTED_RESPONSE
48 |     print(response.response_text)
49 | 
50 | 
51 | @pytest.mark.skipif(not is_sglang_available(), reason="SGLang is not installed")
52 | def test_stream_chat():
53 |     r"""Test the SGLang engine's streaming chat functionality."""
54 |     chat_model = ChatModel(INFER_ARGS)
55 | 
56 |     response = ""
57 |     for token in chat_model.stream_chat(MESSAGES):
58 |         response += token
59 | 
60 |     print("Complete response:", response)
61 |     assert response, "Should receive a non-empty response"
62 | 
63 | 
64 | # Run tests if executed directly
65 | if __name__ == "__main__":
66 |     if not is_sglang_available():
67 |         print("SGLang is not available. Please install it.")
68 |         sys.exit(1)
69 | 
70 |     test_chat()
71 |     test_stream_chat()
72 | 


--------------------------------------------------------------------------------
/tests/e2e/test_train.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.tuner import export_model, run_exp
20 | 
21 | 
22 | DEMO_DATA = os.getenv("DEMO_DATA", "llamafactory/demo_data")
23 | 
24 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
25 | 
26 | TINY_LLAMA_ADAPTER = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-lora")
27 | 
28 | TRAIN_ARGS = {
29 |     "model_name_or_path": TINY_LLAMA3,
30 |     "do_train": True,
31 |     "finetuning_type": "lora",
32 |     "dataset_dir": "REMOTE:" + DEMO_DATA,
33 |     "template": "llama3",
34 |     "cutoff_len": 1,
35 |     "overwrite_output_dir": True,
36 |     "per_device_train_batch_size": 1,
37 |     "max_steps": 1,
38 |     "report_to": "none",
39 | }
40 | 
41 | INFER_ARGS = {
42 |     "model_name_or_path": TINY_LLAMA3,
43 |     "adapter_name_or_path": TINY_LLAMA_ADAPTER,
44 |     "finetuning_type": "lora",
45 |     "template": "llama3",
46 |     "infer_dtype": "float16",
47 | }
48 | 
49 | OS_NAME = os.getenv("OS_NAME", "")
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     "stage,dataset",
54 |     [
55 |         ("pt", "c4_demo"),
56 |         ("sft", "alpaca_en_demo"),
57 |         ("dpo", "dpo_en_demo"),
58 |         ("kto", "kto_en_demo"),
59 |         pytest.param("rm", "dpo_en_demo", marks=pytest.mark.xfail(OS_NAME.startswith("windows"), reason="OS error.")),
60 |     ],
61 | )
62 | def test_run_exp(stage: str, dataset: str):
63 |     output_dir = os.path.join("output", f"train_{stage}")
64 |     run_exp({"stage": stage, "dataset": dataset, "output_dir": output_dir, **TRAIN_ARGS})
65 |     assert os.path.exists(output_dir)
66 | 
67 | 
68 | def test_export():
69 |     export_dir = os.path.join("output", "llama3_export")
70 |     export_model({"export_dir": export_dir, **INFER_ARGS})
71 |     assert os.path.exists(export_dir)
72 | 


--------------------------------------------------------------------------------
/tests/eval/test_eval_template.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from llamafactory.eval.template import get_eval_template
16 | 
17 | 
18 | def test_eval_template_en():
19 |     support_set = [
20 |         {
21 |             "question": "Fewshot question",
22 |             "A": "Fewshot1",
23 |             "B": "Fewshot2",
24 |             "C": "Fewshot3",
25 |             "D": "Fewshot4",
26 |             "answer": "B",
27 |         }
28 |     ]
29 |     example = {
30 |         "question": "Target question",
31 |         "A": "Target1",
32 |         "B": "Target2",
33 |         "C": "Target3",
34 |         "D": "Target4",
35 |         "answer": "C",
36 |     }
37 |     template = get_eval_template(name="en")
38 |     messages = template.format_example(example, support_set=support_set, subject_name="SubName")
39 |     assert messages == [
40 |         {
41 |             "role": "user",
42 |             "content": (
43 |                 "The following are multiple choice questions (with answers) about SubName.\n\n"
44 |                 "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
45 |             ),
46 |         },
47 |         {"role": "assistant", "content": "B"},
48 |         {
49 |             "role": "user",
50 |             "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
51 |         },
52 |         {"role": "assistant", "content": "C"},
53 |     ]
54 | 
55 | 
56 | def test_eval_template_zh():
57 |     support_set = [
58 |         {
59 |             "question": "示例问题",
60 |             "A": "示例答案1",
61 |             "B": "示例答案2",
62 |             "C": "示例答案3",
63 |             "D": "示例答案4",
64 |             "answer": "B",
65 |         }
66 |     ]
67 |     example = {
68 |         "question": "目标问题",
69 |         "A": "目标答案1",
70 |         "B": "目标答案2",
71 |         "C": "目标答案3",
72 |         "D": "目标答案4",
73 |         "answer": "C",
74 |     }
75 |     template = get_eval_template(name="zh")
76 |     messages = template.format_example(example, support_set=support_set, subject_name="主题")
77 |     assert messages == [
78 |         {
79 |             "role": "user",
80 |             "content": (
81 |                 "以下是中国关于主题考试的单项选择题，请选出其中的正确答案。\n\n"
82 |                 "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案："
83 |             ),
84 |         },
85 |         {"role": "assistant", "content": "B"},
86 |         {
87 |             "role": "user",
88 |             "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案：",
89 |         },
90 |         {"role": "assistant", "content": "C"},
91 |     ]
92 | 


--------------------------------------------------------------------------------
/tests/model/model_utils/test_add_tokens.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.hparams import ModelArguments
20 | from llamafactory.model import load_tokenizer
21 | 
22 | 
23 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
24 | 
25 | UNUSED_TOKEN = "<|UNUSED_TOKEN|>"
26 | 
27 | 
28 | @pytest.mark.parametrize("special_tokens", [False, True])
29 | def test_add_tokens(special_tokens: bool):
30 |     if special_tokens:
31 |         model_args = ModelArguments(model_name_or_path=TINY_LLAMA3, add_special_tokens=UNUSED_TOKEN)
32 |     else:
33 |         model_args = ModelArguments(model_name_or_path=TINY_LLAMA3, add_tokens=UNUSED_TOKEN)
34 | 
35 |     tokenizer = load_tokenizer(model_args)["tokenizer"]
36 |     encoded_ids = tokenizer.encode(UNUSED_TOKEN, add_special_tokens=False)
37 |     assert len(encoded_ids) == 1
38 |     decoded_str = tokenizer.decode(encoded_ids, skip_special_tokens=True)
39 |     if special_tokens:
40 |         assert decoded_str == ""
41 |     else:
42 |         assert decoded_str == UNUSED_TOKEN
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     pytest.main([__file__])
47 | 


--------------------------------------------------------------------------------
/tests/model/model_utils/test_attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
19 | 
20 | from llamafactory.extras.packages import is_transformers_version_greater_than
21 | from llamafactory.train.test_utils import load_infer_model
22 | 
23 | 
24 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA3,
28 |     "template": "llama3",
29 | }
30 | 
31 | 
32 | @pytest.mark.xfail(is_transformers_version_greater_than("4.48"), reason="Attention refactor.")
33 | def test_attention():
34 |     attention_available = ["disabled"]
35 |     if is_torch_sdpa_available():
36 |         attention_available.append("sdpa")
37 | 
38 |     if is_flash_attn_2_available():
39 |         attention_available.append("fa2")
40 | 
41 |     llama_attention_classes = {
42 |         "disabled": "LlamaAttention",
43 |         "sdpa": "LlamaSdpaAttention",
44 |         "fa2": "LlamaFlashAttention2",
45 |     }
46 |     for requested_attention in attention_available:
47 |         model = load_infer_model(flash_attn=requested_attention, **INFER_ARGS)
48 |         for module in model.modules():
49 |             if "Attention" in module.__class__.__name__:
50 |                 assert module.__class__.__name__ == llama_attention_classes[requested_attention]
51 | 


--------------------------------------------------------------------------------
/tests/model/model_utils/test_checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | import torch
19 | 
20 | from llamafactory.extras.misc import get_current_device
21 | from llamafactory.train.test_utils import load_train_model
22 | 
23 | 
24 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA3,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "lora",
31 |     "lora_target": "all",
32 |     "dataset": "llamafactory/tiny-supervised-dataset",
33 |     "dataset_dir": "ONLINE",
34 |     "template": "llama3",
35 |     "cutoff_len": 1024,
36 |     "output_dir": "dummy_dir",
37 |     "overwrite_output_dir": True,
38 |     "fp16": True,
39 | }
40 | 
41 | 
42 | @pytest.mark.parametrize("disable_gradient_checkpointing", [False, True])
43 | def test_vanilla_checkpointing(disable_gradient_checkpointing: bool):
44 |     model = load_train_model(disable_gradient_checkpointing=disable_gradient_checkpointing, **TRAIN_ARGS)
45 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
46 |         assert getattr(module, "gradient_checkpointing") != disable_gradient_checkpointing
47 | 
48 | 
49 | def test_unsloth_gradient_checkpointing():
50 |     model = load_train_model(use_unsloth_gc=True, **TRAIN_ARGS)
51 |     for module in filter(lambda m: hasattr(m, "gradient_checkpointing"), model.modules()):
52 |         assert module._gradient_checkpointing_func.__self__.__name__ == "UnslothGradientCheckpointing"
53 | 
54 | 
55 | def test_upcast_layernorm():
56 |     model = load_train_model(upcast_layernorm=True, **TRAIN_ARGS)
57 |     for name, param in model.named_parameters():
58 |         if param.ndim == 1 and "norm" in name:
59 |             assert param.dtype == torch.float32
60 | 
61 | 
62 | def test_upcast_lmhead_output():
63 |     model = load_train_model(upcast_lmhead_output=True, **TRAIN_ARGS)
64 |     inputs = torch.randn((1, 16), dtype=torch.float16, device=get_current_device())
65 |     outputs: torch.Tensor = model.get_output_embeddings()(inputs)
66 |     assert outputs.dtype == torch.float32
67 | 


--------------------------------------------------------------------------------
/tests/model/model_utils/test_misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | import torch
19 | from transformers import AutoConfig, AutoModelForCausalLM
20 | 
21 | from llamafactory.model.model_utils.misc import find_expanded_modules
22 | 
23 | 
24 | HF_TOKEN = os.getenv("HF_TOKEN")
25 | 
26 | 
27 | @pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
28 | def test_expanded_modules():
29 |     config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
30 |     with torch.device("meta"):
31 |         model = AutoModelForCausalLM.from_config(config)
32 | 
33 |     expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4)
34 |     assert expanded_modules == [
35 |         "model.layers.7.self_attn.q_proj",
36 |         "model.layers.7.self_attn.v_proj",
37 |         "model.layers.15.self_attn.q_proj",
38 |         "model.layers.15.self_attn.v_proj",
39 |         "model.layers.23.self_attn.q_proj",
40 |         "model.layers.23.self_attn.v_proj",
41 |         "model.layers.31.self_attn.q_proj",
42 |         "model.layers.31.self_attn.v_proj",
43 |     ]
44 | 


--------------------------------------------------------------------------------
/tests/model/model_utils/test_packing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | import torch
17 | 
18 | from llamafactory.model.model_utils.packing import get_seqlens_in_batch, get_unpad_data
19 | 
20 | 
21 | @pytest.mark.parametrize(
22 |     "attention_mask,golden_seq_lens",
23 |     [
24 |         (
25 |             [
26 |                 [1, 1, 2, 2, 2, 0],
27 |                 [1, 2, 2, 3, 3, 3],
28 |             ],
29 |             [2, 3, 1, 2, 3],
30 |         ),
31 |         (
32 |             [[1]],
33 |             [1],
34 |         ),
35 |     ],
36 | )
37 | def test_get_seqlens_in_batch(attention_mask, golden_seq_lens):
38 |     attention_mask_with_indices = torch.tensor(attention_mask)
39 |     seqlens_in_batch = get_seqlens_in_batch(attention_mask_with_indices)
40 |     assert torch.all(seqlens_in_batch == torch.tensor(golden_seq_lens))
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     "attention_mask,golden_indices,golden_cu_seqlens,golden_max_seqlen",
45 |     [
46 |         (
47 |             [
48 |                 [1, 1, 2, 2, 2, 0],
49 |                 [1, 2, 2, 3, 3, 3],
50 |             ],
51 |             [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11],
52 |             [0, 2, 5, 6, 8, 11],
53 |             3,
54 |         ),
55 |         (
56 |             [[1]],
57 |             [0],
58 |             [0, 1],
59 |             1,
60 |         ),
61 |     ],
62 | )
63 | def test_get_unpad_data(attention_mask, golden_indices, golden_cu_seqlens, golden_max_seqlen):
64 |     attention_mask_with_indices = torch.tensor(attention_mask)
65 |     indices, cu_seqlens, max_seqlen_in_batch = get_unpad_data(attention_mask_with_indices)
66 |     assert torch.all(indices == torch.tensor(golden_indices))
67 |     assert torch.all(cu_seqlens == torch.tensor(golden_cu_seqlens, dtype=torch.int32))
68 |     assert max_seqlen_in_batch == golden_max_seqlen
69 | 


--------------------------------------------------------------------------------
/tests/model/test_base.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, patch_valuehead_model
20 | 
21 | 
22 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_VALUEHEAD = os.getenv("TINY_LLAMA_VALUEHEAD", "llamafactory/tiny-random-Llama-3-valuehead")
25 | 
26 | INFER_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA3,
28 |     "template": "llama3",
29 |     "infer_dtype": "float16",
30 | }
31 | 
32 | 
33 | @pytest.fixture
34 | def fix_valuehead_cpu_loading():
35 |     patch_valuehead_model()
36 | 
37 | 
38 | def test_base():
39 |     model = load_infer_model(**INFER_ARGS)
40 |     ref_model = load_reference_model(TINY_LLAMA3)
41 |     compare_model(model, ref_model)
42 | 
43 | 
44 | @pytest.mark.usefixtures("fix_valuehead_cpu_loading")
45 | def test_valuehead():
46 |     model = load_infer_model(add_valuehead=True, **INFER_ARGS)
47 |     ref_model = load_reference_model(TINY_LLAMA_VALUEHEAD, add_valuehead=True)
48 |     compare_model(model, ref_model)
49 | 


--------------------------------------------------------------------------------
/tests/model/test_freeze.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA3,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "freeze",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "output_dir": "dummy_dir",
34 |     "overwrite_output_dir": True,
35 |     "fp16": True,
36 | }
37 | 
38 | INFER_ARGS = {
39 |     "model_name_or_path": TINY_LLAMA3,
40 |     "finetuning_type": "freeze",
41 |     "template": "llama3",
42 |     "infer_dtype": "float16",
43 | }
44 | 
45 | 
46 | def test_freeze_train_all_modules():
47 |     model = load_train_model(freeze_trainable_layers=1, **TRAIN_ARGS)
48 |     for name, param in model.named_parameters():
49 |         if name.startswith("model.layers.1."):
50 |             assert param.requires_grad is True
51 |             assert param.dtype == torch.float32
52 |         else:
53 |             assert param.requires_grad is False
54 |             assert param.dtype == torch.float16
55 | 
56 | 
57 | def test_freeze_train_extra_modules():
58 |     model = load_train_model(freeze_trainable_layers=1, freeze_extra_modules="embed_tokens,lm_head", **TRAIN_ARGS)
59 |     for name, param in model.named_parameters():
60 |         if name.startswith("model.layers.1.") or any(module in name for module in ["embed_tokens", "lm_head"]):
61 |             assert param.requires_grad is True
62 |             assert param.dtype == torch.float32
63 |         else:
64 |             assert param.requires_grad is False
65 |             assert param.dtype == torch.float16
66 | 
67 | 
68 | def test_freeze_inference():
69 |     model = load_infer_model(**INFER_ARGS)
70 |     for param in model.parameters():
71 |         assert param.requires_grad is False
72 |         assert param.dtype == torch.float16
73 | 


--------------------------------------------------------------------------------
/tests/model/test_full.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import torch
18 | 
19 | from llamafactory.train.test_utils import load_infer_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TRAIN_ARGS = {
25 |     "model_name_or_path": TINY_LLAMA3,
26 |     "stage": "sft",
27 |     "do_train": True,
28 |     "finetuning_type": "full",
29 |     "dataset": "llamafactory/tiny-supervised-dataset",
30 |     "dataset_dir": "ONLINE",
31 |     "template": "llama3",
32 |     "cutoff_len": 1024,
33 |     "output_dir": "dummy_dir",
34 |     "overwrite_output_dir": True,
35 |     "fp16": True,
36 | }
37 | 
38 | INFER_ARGS = {
39 |     "model_name_or_path": TINY_LLAMA3,
40 |     "finetuning_type": "full",
41 |     "template": "llama3",
42 |     "infer_dtype": "float16",
43 | }
44 | 
45 | 
46 | def test_full_train():
47 |     model = load_train_model(**TRAIN_ARGS)
48 |     for param in model.parameters():
49 |         assert param.requires_grad is True
50 |         assert param.dtype == torch.float32
51 | 
52 | 
53 | def test_full_inference():
54 |     model = load_infer_model(**INFER_ARGS)
55 |     for param in model.parameters():
56 |         assert param.requires_grad is False
57 |         assert param.dtype == torch.float16
58 | 


--------------------------------------------------------------------------------
/tests/model/test_pissa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 the LlamaFactory team.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | import pytest
18 | 
19 | from llamafactory.train.test_utils import compare_model, load_infer_model, load_reference_model, load_train_model
20 | 
21 | 
22 | TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
23 | 
24 | TINY_LLAMA_PISSA = os.getenv("TINY_LLAMA_ADAPTER", "llamafactory/tiny-random-Llama-3-pissa")
25 | 
26 | TRAIN_ARGS = {
27 |     "model_name_or_path": TINY_LLAMA3,
28 |     "stage": "sft",
29 |     "do_train": True,
30 |     "finetuning_type": "lora",
31 |     "pissa_init": True,
32 |     "pissa_iter": -1,
33 |     "dataset": "llamafactory/tiny-supervised-dataset",
34 |     "dataset_dir": "ONLINE",
35 |     "template": "llama3",
36 |     "cutoff_len": 1024,
37 |     "output_dir": "dummy_dir",
38 |     "overwrite_output_dir": True,
39 |     "fp16": True,
40 | }
41 | 
42 | INFER_ARGS = {
43 |     "model_name_or_path": TINY_LLAMA_PISSA,
44 |     "adapter_name_or_path": TINY_LLAMA_PISSA,
45 |     "adapter_folder": "pissa_init",
46 |     "finetuning_type": "lora",
47 |     "template": "llama3",
48 |     "infer_dtype": "float16",
49 | }
50 | 
51 | 
52 | @pytest.mark.xfail(reason="PiSSA initialization is not stable in different platform.")
53 | def test_pissa_train():
54 |     model = load_train_model(**TRAIN_ARGS)
55 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=True)
56 |     compare_model(model, ref_model)
57 | 
58 | 
59 | @pytest.mark.xfail(reason="Known connection error.")
60 | def test_pissa_inference():
61 |     model = load_infer_model(**INFER_ARGS)
62 |     ref_model = load_reference_model(TINY_LLAMA_PISSA, TINY_LLAMA_PISSA, use_pissa=True, is_trainable=False)
63 |     ref_model = ref_model.merge_and_unload()
64 |     compare_model(model, ref_model)
65 | 


--------------------------------------------------------------------------------
/tests/version.txt:
--------------------------------------------------------------------------------
1 | # change if test fails or cache is outdated
2 | 0.9.3.107
3 | 


--------------------------------------------------------------------------------